ESP_IOT v2.5
IOT ESP Coding
TokenParser.cpp
Go to the documentation of this file.
1#include "TokenParser.h"
2
3#ifdef WILDCARD_DEVICE_NAME_SUPPORT
4
5/*
6<identifier> ::= {characters A-Z,a-z,0-9,_}
7<notModifier> ::= '!'
8<modifier> ::= <notModifier>
9<wildcardAny> ::= '*'
10<wildcard> ::= <wildcardAny>
11<booleanAnd> ::= '&'
12<booleanOr> ::= '|'
13<booleanOps> ::= <booleanAnd> | <booleanOr> | <null>
14<queryElement> ::= { [<modifier>] [<wildcard>] <identifier> [<wildcard>] }
15<queryPart> ::= { <queryElement> <booleanOps> }
16
17<identifierFollow> ::= {anything not <identifier> }
18eg. ! M5* -> <notModifier> <identifier> <wildcard>
19!*M5* & ! *GEN3* -> <wildcard=*> <identifier=M5> <wildcard=*> <booleanOps=&> <wildcard=*> <identifier=GEN3>
20means: any non M5* and not *GEN3*
21
22
23Currently presidence will be AND over OR (or just left to right)
24*/
25
26//!retrieves the parsed values
27
28//!the tokens in the token array
29typedef enum
30{
38
39//!max length of an identifier..
40const int _identifierMaxLen= 30;
41//! storage for the current identifier
43// !where in the current identifier
45
46typedef struct identifierStruct
47{
51 boolean notModifier;
53
54//!so now a query is { IdentifierStruct <booleanAnd/Or> }
55
56//!the max number of identifiers
58//!index into _identifiers array
60//! array of identifiers ..
62
63const int _tokensMax = 10;
65//!index into _tokens array
67
68//!whether in an identifier parsing
70
71//!pushes token onto token stack
73{
74 SerialLots.printf("\nPushToken[%d] %d\n", _tokensStackPointer, token);
75
77 {
78 SerialMin.println(" *** Over the token max ***");
79 return;
80 }
82}
83
84//!get the current top of stack token
86{
87 //if stack empty .. return null_enum
88 if (_tokensStackPointer == 0)
89 return null_enum;
90 //! otherwise grab the top
92}
93
94//!get the current top of stack token, and move the token stack pointer
96{
97 TokensEnum result;
98 //if stack empty .. return null_enum
99 if (_tokensStackPointer == 0)
100 result = null_enum;
101 else
102 {
103 //!else pop
106 }
107 SerialLots.printf("\npop[%d] = %d\n", _tokensStackPointer, result);
108 return result;
109}
110
111//!pushes identifier onto identifier stack, while also inserting a token for identifier
112void pushIdentifier(char *identifier)
113{
114 SerialLots.printf("\nPushId %s\n", identifier);
115
117 //! now save Identifier
118 strcpy(_identifierStructs[_identifierStructsIndex++].identifier, identifier);
119}
120
121//! how many tokens
123{
124 return _tokensStackPointer;
125}
126//!how many identifiers
128{
130}
131//!get the nth token
133{
134 return _tokens[num];
135}
136//!the nth identifier
137char *getIdentifier(int num)
138{
139 return _identifierStructs[num].identifier;
140}
141
142//!grab full identifier struct (Pointer?)
144{
145 return _identifierStructs[num];
146}
147
148//!adds a valie character to an identifier
150{
153 _identifier[_identifierIndex] = (char)NULL;
154}
155//!initi for each new identifier
156void initIdentifier(char c)
157{
160}
161
162//!finished parsing an identifier
164{
165 _parsingIdentifier = false;
167}
168
169// whether the character is a valid identifier character
170boolean validIdentifierChar(char c)
171{
172 boolean valid = false;
173 if (c == '_')
174 valid = true;
175 else if (c >= 'A' && c <= 'Z')
176 valid = true;
177 else if (c >= 'a' && c <= 'z')
178 valid = true;
179 else if (c >= '0' && c <= '9')
180 valid = true;
181 else valid = false;
182 return valid;
183}
184
185//!push identifier .. looks at the stack and fills in the identifier object
186//!called after seeing a & or | (the follow set of the identifier, after a *)*
188{
189 //!the index is already 1 past..
190 int identifierStructsIndex = _identifierStructsIndex-1;
191 SerialLots.printf("\nprocessIdentifier[%d]\n", identifierStructsIndex);
192 //! look at stack
193 // the _identifierStructsIndex is the identifier we will modify..
195 if (top == wildcardAny_enum)
196 {
197 SerialLots.printf("wildcardAny_enum\n");
198
199 _identifierStructs[identifierStructsIndex].wildcardAfter = true;
200 top = popTokenStack();
201 }
202 top = topOfTokenStack();
203 if (top != identifier_enum)
204 {
205 SerialMin.printf("*** Syntax error: expected identifier, found: %d\n", top);
206 }
207 else
208 {
209 SerialLots.printf("identifier_enum\n");
210
211 // should be identifier we are looking for..
212 top = popTokenStack();
213 }
214 top = topOfTokenStack();
215 if (top == wildcardAny_enum)
216 {
217 SerialLots.printf("wildcardAny_enum\n");
218
219 _identifierStructs[identifierStructsIndex].wildcardBefore = true;
220 top = popTokenStack();
221 }
222 top = topOfTokenStack();
223 if (top == notModifier_enum)
224 {
225 SerialLots.printf("notModifier_enum\n");
226
227 _identifierStructs[identifierStructsIndex].notModifier = true;
228 top = popTokenStack();
229 SerialLots.printf("token = %d\n", top);
230 }
231
232 //!replace the stack with our filled out identifier
234
235}
236
237//!initialize the parser
239{
243 _parsingIdentifier = false;
244 for (int i=0; i< _identifierStructsMax; i++)
245 {
249 }
250}
251
252
253//!parses a line of text, returning the number of tokens..
255{
256
257 SerialTemp.printf("\nDevQuery: %s\n", line);
258
259 //! initialize the variables for parsing this line
260 initParser();
261 //! if no query params, then make it an identifier and exit
262 if (!stringIsQuery_mainModule(line))
263 {
264 SerialLots.println("*** Not a query, so optimze to just identifier");
265 pushIdentifier(line);
266 return;
267 }
268 while (*line)
269 {
270 char c = *line++;
271 SerialLots.print(c);
272 if (validIdentifierChar(c))
273 {
274 //! if parsing still then add to identifier
276 {
278 }
279 else
280 {
281 //! start a new identifier..
282 _parsingIdentifier = true;
283 //! init the identifier
285 }
286 }
287 //! else not valid identifier (so a follow..)
288 else
289 {
290 boolean finishIdNeeded = false;
291 //! done parsing if not a valid identifier character
293 {
295 finishIdNeeded = true;
296 //! finishIdNeeded is before pushing another token, unless a not or wildcard
297 }
298 //! now classify this char
299 //! if an identifier modifier
300 if (c == '!' || c == '*')
301 {
302 switch (c)
303 {
304 //! ! and * belong to the identifier object
305 //! if a modifier of ident (~ or *) then push the token,
306 //! and process the identifier
307 case '!':
309 break;
310 case '*':
312 break;
313 }
314 if (finishIdNeeded)
316 }
317 else
318 {
319 //! an operator or space
320 //! if inside an identifier, finish it first
321 if (finishIdNeeded)
323 //! process the token
324 switch (c)
325 {
326 //!& and | are expression modifiers
327 case '&':
329 break;
330 case '|':
332 break;
333 case ' ':
334 break;
335 default:
336 //** Invalid character **.
337 SerialTemp.println(" *** INVALID CHAR ***");
338 break;
339 }
340 } // switch
341
342 }
343 } //while
344
345 //! if end of line, and was parsing an identifier .. then finish
347 {
350 }
351
352}
353
354
355//! These are CASE SENSITIVE matches ...
356//!matches name with wildcards and the idToMatch
357//!name is what is being queried, the idQueryStruct is the QUERY
358boolean nameMatchesWildcardQuery(char *name, IdentifierStruct idQueryStruct)
359{
360 char *queryIdToMatch = idQueryStruct.identifier;
361 boolean wildcardBefore = idQueryStruct.wildcardBefore;
362 boolean wildcardAfter = idQueryStruct.wildcardAfter;
363 boolean wildcardNot = idQueryStruct.notModifier;
364 //default
365 boolean match = false;
366
367 SerialLots.printf("%s - idQuery(%s), before=%d,after=%d,not=%d\n", name, queryIdToMatch, wildcardBefore, wildcardAfter, wildcardNot);
368 //!@see https://legacy.cplusplus.com/reference/cstring/strncmp/
369 //! eg. idToMatch == M5 and wildcard after and before, then just search for idToMatch in name
370 //! If not before, then starts with ...
371 //! if non, then strmatch
372 if (wildcardBefore && wildcardAfter)
373 // contains .. ptr to first occurance of idToMatch in name
374 match = (strstr(name, queryIdToMatch)!=NULL);
375 else if (wildcardAfter)
376 // starts with ..
377 match = (strncmp(name, queryIdToMatch, strlen(queryIdToMatch))==0);
378 else if (wildcardBefore)
379 {
380 int queryIdLen = strlen(queryIdToMatch);
381 int nameLen = strlen(name);
382 // ends with..
383 //eg: *Rainier (and name == M5Rainier)
384 // and *Rainier matched Rainier
385 // *Room1 and Scooby_Room1 should match
386 // diff = *R and SR -> queryLen = 1, our len = 2, 2 -1 = 1 CORRECT
387 // just look for the last idLen characters of name
388 if (nameLen >= queryIdLen)
389 {
390 //start search at N into name
391 char *startInName = name + nameLen - queryIdLen;
392 match = strcmp(startInName, queryIdToMatch) == 0;
393 }
394 }
395 else
396 // is identical
397 match = strcmp(name, queryIdToMatch)==0;
398
399 //!see if there was a NOT (!)
400 if (wildcardNot)
401 match = !match;
402 return match;
403}
404
405
406//! whether the stirng is a potential query
407boolean stringIsQuery_mainModule(char *line)
408{
409 //strpbrk = first occurance of any characters in str2 in str1
410 boolean isQuery = strpbrk(line, (char*)"|*&!") != NULL;
411 return isQuery;
412}
413
414//!now need to process the token tree to see if name1 and name2 match the query
415//!eg. ! *name* & name2 ...
416//!wildcards after name make it slightly complicated
417//! SO: look for wildcards before and after, and the id. then decide if that matches
418//! before the boolean and notmodifier applied
420{
421 //!basically for each identifier in the query, the names (1&2) have to be compared each time..
422 boolean match = true;
423 int tokensIndex = 0;
424 int idIndex = 0;
425
426 //!what the last boolean_enum was..
427 TokensEnum boolean_enum = null_enum;
428 IdentifierStruct lastIdentifier;
429 // each identifier has a match..
430 boolean thisMatch = false;
431
432 while (tokensIndex < numTokens())
433 {
434 // A or B and D
435 TokensEnum token = getToken(tokensIndex++);
436 switch (token)
437 {
438 case booleanOr_enum:
439 boolean_enum = booleanOr_enum;
440 break;
441 case booleanAnd_enum:
442 boolean_enum = booleanAnd_enum;
443 break;
444 case identifier_enum:
445 lastIdentifier = getIdentifierStruct(idIndex++);
446 thisMatch = nameMatchesWildcardQuery(name, lastIdentifier);
447 if (boolean_enum == null_enum)
448 {
449 // first time, reset of times the enum will be set..
450 // good if only Identifier too.. if false .. true & false = false
451 match = match && thisMatch;
452 }
453 else
454 {
455 //! the result of A <> B
456 // at the B, eg A or B | A and B
457 if (boolean_enum == booleanOr_enum)
458 match = match || thisMatch;
459 else
460 match = match && thisMatch;
461 // reset, should never read again
462 boolean_enum = null_enum;
463 }
464 SerialLots.printf("thisMatch = %d, match = %d\n", thisMatch, match);
465
466 break;
467 // ID or ID and ID
468 // null ID or IDx
469 };
470 }
471 return match;
472}
473
474//#define TEST_PARSER
475#ifdef TEST_PARSER
476
477//!walks the tree printing it out..
478void printTokenTree()
479{
480 //SerialLots.printf("tokens=%d, id=%d\n", numTokens(), numIdentifiers());
481 SerialTemp.println("TOKEN TREE..");
482
483 int tokensIndex = 0;
484 int idIndex = 0;
485 while (tokensIndex < numTokens())
486 {
487 TokensEnum token = getToken(tokensIndex);
488 SerialTemp.printf("Token [%d]= ", tokensIndex);
489 tokensIndex++;
490 switch (token)
491 {
492 case notModifier_enum:
493 SerialTemp.println("notModifier_enum");
494 break;
495 case wildcardAny_enum:
496 SerialTemp.println("wildcardAny_enum");
497 break;
498 case booleanOr_enum:
499 SerialTemp.println("booleanOr_enum");
500 break;
501 case booleanAnd_enum:
502 SerialTemp.println("booleanAnd_enum");
503 break;
504 case identifier_enum:
505 {
506 IdentifierStruct idQueryStruct = getIdentifierStruct(idIndex++);
507 char *id = idQueryStruct.identifier;
508 SerialTemp.printf("identifier_enum: %s%s%s%s\n",
509 idQueryStruct.notModifier?"!":"",
510 idQueryStruct.wildcardBefore?"*":"",
511 id,
512 idQueryStruct.wildcardAfter?"*":""
513 );
514
515 }
516 break;
517 };
518 }
519}
520//!printout
521void testMatchResult(char*query, boolean expectedBool)
522{
523 boolean match = queryMatchesName_mainModule(query);
524 SerialTemp.printf("query '%s' ", query);
525 if (match != expectedBool)
526 SerialTemp.printf("!= expected: ");
527 else
528 SerialTemp.printf("== matched : ");
529 SerialTemp.printf("%s\n", expectedBool?(char*)"MATCH":(char*)"NO-MATCH");
530
531}
532
533boolean _firstTimeParserTest = true;
534
535//!test
536void testParser_mainModule()
537{
538 int tokens;
539 boolean match;
540 parseQueryLine_mainModule((char*)"!M5WRR");
541 //printTokenTree();
542 testMatchResult((char*)"M5WRR", false);
543 testMatchResult((char*)"M5Rainier", true);
544
545 parseQueryLine_mainModule((char*)"!M5WRR & !DukeGEN3");
546 //printTokenTree();
547 testMatchResult((char*)"M5WRR", false);
548 testMatchResult((char*)"DukeGEN3", false);
549 testMatchResult((char*)"ScoobyDoo", true);
550 testMatchResult((char*)"ScottyGEN3", true);
551
552 parseQueryLine_mainModule((char*)"M5* | DukeGEN3");
553 testMatchResult((char*)"M5Scooby", true);
554 testMatchResult((char*)"DukeGEN3", true);
555
556
557 parseQueryLine_mainModule((char*)"!*GEN3*");
558 testMatchResult((char*)"M5Scooby", true);
559 testMatchResult((char*)"DukeGEN3", false);
560
561 parseQueryLine_mainModule((char*)"!M5* ");
562 testMatchResult((char*)"M5Scooby", false);
563 testMatchResult((char*)"DukeGEN3", true);
564
565 parseQueryLine_mainModule((char*)"!M5* & !*GEN3*");
566 testMatchResult((char*)"M5Scooby", false);
567 testMatchResult((char*)"DukeGEN3", false);
568 testMatchResult((char*)"M5DukeGEN3", false);
569 testMatchResult((char*)"M5DukeGEN3", true);
570
571 parseQueryLine_mainModule((char*)"!M5* | *GEN3*");
572 testMatchResult((char*)"M5Scooby", false);
573 testMatchResult((char*)"DukeGEN3", true);
574 testMatchResult((char*)"M5DukeGEN3", false); //interesting
575
576 parseQueryLine_mainModule((char*)"!M5*");
577 testMatchResult((char*)"M5WRR", false);
578 testMatchResult((char*)"DukeGEN3", true);
579
580 parseQueryLine_mainModule((char*)"!M5WRR");
581 testMatchResult((char*)"M5WRR", false);
582 testMatchResult((char*)"M5Scooby", true);
583
584 parseQueryLine_mainModule((char*)"*M5* |*GEN3* | ScoobyDoo");
585 testMatchResult((char*)"M5Scooby", true);
586 testMatchResult((char*)"GEN3Scott", true);
587 testMatchResult((char*)"ScoobyDoo", true);
588 testMatchResult((char*)"Duke", false);
589 testMatchResult((char*)"M5Duke", true);
590
591 parseQueryLine_mainModule((char*)"ScoobyDoo");
592 testMatchResult((char*)"ScoobyDoo", true);
593 testMatchResult((char*)"Duke", false);
594
595 parseQueryLine_mainModule((char*)"!*M5* | *GEN3* &ScoobyDoo");
596 testMatchResult((char*)"ScoobyDoo", true);
597 testMatchResult((char*)"M5WRR", false);
598 testMatchResult((char*)"GEN3Scott", true);
599
600 parseQueryLine_mainModule((char*)"!*M5* | *GEN3* | ScoobyDoo");
601 testMatchResult((char*)"ScoobyDoo", true);
602 testMatchResult((char*)"M5WRR", false);
603 testMatchResult((char*)"GEN3Scott", true);
604
605 //! all Room1 which are M5. eg. Room1_M5Scooby
606 parseQueryLine_mainModule((char*)"Room1_* & *M5*");
607 testMatchResult((char*)"Room1_M5Scooby", true);
608 testMatchResult((char*)"Room1_GEN3", false);
609 testMatchResult((char*)"Room2_GEN3", false);
610 testMatchResult((char*)"M5WRR", false);
611
612 //! all Room1 which are M5. eg. Room1_M5Scooby
613 parseQueryLine_mainModule((char*)"Room1_* | *M5*");
614 testMatchResult((char*)"Room1_M5Scooby", true);
615 testMatchResult((char*)"Room1_GEN3", true);
616 testMatchResult((char*)"Room2_GEN3", false);
617 testMatchResult((char*)"Room2_M5", true);
618 testMatchResult((char*)"M5WRR", true);
619
620 //! all Room1 which are M5. eg. Room1_M5Scooby
621 parseQueryLine_mainModule((char*)"Room1_*");
622 testMatchResult((char*)"Room1_M5Scooby", true);
623 testMatchResult((char*)"Room1_GEN3", true);
624 testMatchResult((char*)"Room2_GEN3", false);
625
626 //! all Room1 which are M5. eg. Room1_M5Scooby
627 parseQueryLine_mainModule((char*)"*Room1");
628 testMatchResult((char*)"Scooby_Room1", true);
629 testMatchResult((char*)"Scooby_Room1_GEN3", false);
630 testMatchResult((char*)"Room1", true);
631 testMatchResult((char*)"Room2", false);
632
633 //! all Room1 which are M5. eg. Room1_M5Scooby
634 parseQueryLine_mainModule((char*)"*GEN3");
635 testMatchResult((char*)"Scooby_GEN3", true);
636 testMatchResult((char*)"Scooby_GEN3_Room1", false);
637 testMatchResult((char*)"GEN3", true);
638 testMatchResult((char*)"GEN2", false);
639
640 //! all Room1 which are M5. eg. Room1_M5Scooby
641 parseQueryLine_mainModule((char*)"!*GEN3");
642 testMatchResult((char*)"Scooby_GEN3", false);
643 testMatchResult((char*)"Scooby_GEN3_Room1", true);
644 testMatchResult((char*)"GEN3", false);
645 testMatchResult((char*)"GEN2", true);
646 testMatchResult((char*)"M5Scooby", true);
647 testMatchResult((char*)"DukeGEN3", false);
648 testMatchResult((char*)"M5DukeGEN3", false);
649}
650
651#endif //test parser
652
653//! setup a test ..
655{
656#ifdef TEST_PARSER
657 if (_firstTimeParserTest)
658 {
659 _firstTimeParserTest = false;
660 testParser_mainModule();
661 }
662#endif
663}
664
665#else
666
667
668//!parses a line of text, The caller then uses queryMatchesName() to see if their name matches
669void parseQueryLine_mainModule(char *line)
670{
671 //noop
672}
673
674//!now need to process the token tree to see if name1 and name2 match the query
675//!eg. ! *name* & name2 ...
676//!wildcards after name make it slightly complicated
677//! SO: look for wildcards before and after, and the id. then decide if that matches
678//! before the boolean and notmodifier applied
679boolean queryMatchesName_mainModule(char*name)
680{
681 return true;
682}
683
684//! whether the stirng is a potential query. This can be used, but it is also used by the parseQueryLine
685//! to optimize out the original dev:name query.
686boolean stringIsQuery_mainModule(char *line)
687{
688 //noop
689}
690
691//! setup for the token parser (really just for testing)
693{
694 //noop
695}
696
697#endif
void initParser()
initialize the parser
TokensEnum topOfTokenStack()
get the current top of stack token
Definition: TokenParser.cpp:85
const int _identifierStructsMax
so now a query is { IdentifierStruct <booleanAnd/Or> }
Definition: TokenParser.cpp:57
int _identifierIndex
Definition: TokenParser.cpp:44
boolean stringIsQuery_mainModule(char *line)
whether the stirng is a potential query
struct identifierStruct IdentifierStruct
const int _identifierMaxLen
max length of an identifier..
Definition: TokenParser.cpp:40
TokensEnum _tokens[_tokensMax]
Definition: TokenParser.cpp:64
boolean nameMatchesWildcardQuery(char *name, IdentifierStruct idQueryStruct)
void addIdentifierChar(char c)
adds a valie character to an identifier
boolean _parsingIdentifier
whether in an identifier parsing
Definition: TokenParser.cpp:69
boolean queryMatchesName_mainModule(char *name)
void initIdentifier(char c)
initi for each new identifier
IdentifierStruct _identifierStructs[_identifierStructsMax]
array of identifiers ..
Definition: TokenParser.cpp:61
TokensEnum getToken(int num)
get the nth token
void parseQueryLine_mainModule(char *line)
parses a line of text, returning the number of tokens..
void setup_tokenParser_mainModule()
setup a test ..
void pushToken(TokensEnum token)
pushes token onto token stack
Definition: TokenParser.cpp:72
boolean validIdentifierChar(char c)
int numTokens()
how many tokens
void pushIdentifier(char *identifier)
pushes identifier onto identifier stack, while also inserting a token for identifier
char * getIdentifier(int num)
the nth identifier
const int _tokensMax
Definition: TokenParser.cpp:63
int _identifierStructsIndex
index into _identifiers array
Definition: TokenParser.cpp:59
void processIdentifier()
int numIdentifiers()
how many identifiers
TokensEnum popTokenStack()
get the current top of stack token, and move the token stack pointer
Definition: TokenParser.cpp:95
int _tokensStackPointer
index into _tokens array
Definition: TokenParser.cpp:66
void doneParsingIdentifier()
finished parsing an identifier
char _identifier[_identifierMaxLen]
storage for the current identifier
Definition: TokenParser.cpp:42
TokensEnum
retrieves the parsed values
Definition: TokenParser.cpp:30
@ identifier_enum
Definition: TokenParser.cpp:35
@ booleanOr_enum
Definition: TokenParser.cpp:33
@ notModifier_enum
Definition: TokenParser.cpp:31
@ booleanAnd_enum
Definition: TokenParser.cpp:34
@ wildcardAny_enum
Definition: TokenParser.cpp:32
@ null_enum
Definition: TokenParser.cpp:36
IdentifierStruct getIdentifierStruct(int num)
grab full identifier struct (Pointer?)
boolean wildcardAfter
Definition: TokenParser.cpp:50
boolean wildcardBefore
Definition: TokenParser.cpp:49
char identifier[_identifierMaxLen]
Definition: TokenParser.cpp:48