Package pyparsing ::
Module pyparsing
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45 The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
60
61 __version__ = "1.5.1"
62 __versionTime__ = "2 October 2008 00:44"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65 import string
66 from weakref import ref as wkref
67 import copy
68 import sys
69 import warnings
70 import re
71 import sre_constants
72
73
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor',
92 ]
93
94
95 """
96 Detect if we are running version 3.X and make appropriate changes
97 Robert A. Clark
98 """
99 if sys.version_info[0] > 2:
100 _PY3K = True
101 _MAX_INT = sys.maxsize
102 basestring = str
103 else:
104 _PY3K = False
105 _MAX_INT = sys.maxint
106
107 if not _PY3K:
109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111 then < returns the unicode object | encodes it with the default encoding | ... >.
112 """
113 try:
114
115
116 return str(obj)
117
118 except UnicodeEncodeError:
119
120
121
122
123
124 return unicode(obj)
125
126
127
128
129
130
131 else:
132 _ustr = str
133 unichr = chr
134
136 return dict( [(c,0) for c in strg] )
137
138
140 """Escape &, <, >, ", ', etc. in a string of data."""
141
142
143 from_symbols = '&><"\''
144 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
145 for from_,to_ in zip(from_symbols, to_symbols):
146 data = data.replace(from_, to_)
147 return data
148
151
152 if not _PY3K:
153 alphas = string.lowercase + string.uppercase
154 else:
155 alphas = string.ascii_lowercase + string.ascii_uppercase
156 nums = string.digits
157 hexnums = nums + "ABCDEFabcdef"
158 alphanums = alphas + nums
159 _bslash = chr(92)
160 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
161
163 """base exception class for all parsing runtime exceptions"""
164 __slots__ = ( "loc","msg","pstr","parserElement" )
165
166
167 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
176
178 """supported attributes by name are:
179 - lineno - returns the line number of the exception text
180 - col - returns the column number of the exception text
181 - line - returns the line containing the exception text
182 """
183 if( aname == "lineno" ):
184 return lineno( self.loc, self.pstr )
185 elif( aname in ("col", "column") ):
186 return col( self.loc, self.pstr )
187 elif( aname == "line" ):
188 return line( self.loc, self.pstr )
189 else:
190 raise AttributeError(aname)
191
193 return "%s (at char %d), (line:%d, col:%d)" % \
194 ( self.msg, self.loc, self.lineno, self.column )
208 return "loc msg pstr parserElement lineno col line " \
209 "markInputLine __str__ __repr__".split()
210
212 """exception thrown when parse expressions don't match class;
213 supported attributes by name are:
214 - lineno - returns the line number of the exception text
215 - col - returns the column number of the exception text
216 - line - returns the line containing the exception text
217 """
218 pass
219
221 """user-throwable exception thrown when inconsistent parse content
222 is found; stops all parsing immediately"""
223 pass
224
226 """just like ParseFatalException, but thrown internally when an
227 ErrorStop indicates that parsing is to stop immediately because
228 an unbacktrackable syntax error has been found"""
232
233
234
235
236
237
238
239
240
241
242
243
244
245
247 """exception thrown by validate() if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
260 return repr(self.tup)
262 self.tup = (self.tup[0],i)
263
265 """Structured parse results, to provide multiple means of access to the parsed data:
266 - as a list (len(results))
267 - by list index (results[0], results[1], etc.)
268 - by attribute (results.<resultsName>)
269 """
270 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
271 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
272 if isinstance(toklist, cls):
273 return toklist
274 retobj = object.__new__(cls)
275 retobj.__doinit = True
276 return retobj
277
278
279
280 - def __init__( self, toklist, name=None, asList=True, modal=True ):
312
321
334
336 if isinstance(i,(int,slice)):
337 mylen = len( self.__toklist )
338 del self.__toklist[i]
339
340
341 if isinstance(i, int):
342 if i < 0:
343 i += mylen
344 i = slice(i, i+1)
345
346 removed = list(range(*i.indices(mylen)))
347 removed.reverse()
348
349 for name in self.__tokdict:
350 occurrences = self.__tokdict[name]
351 for j in removed:
352 for k, (value, position) in enumerate(occurrences):
353 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
354 else:
355 del self.__tokdict[i]
356
359
362 __nonzero__ = __bool__
366 """Returns all named result keys."""
367 return self.__tokdict.keys()
368
369 - def pop( self, index=-1 ):
370 """Removes and returns item at specified index (default=last).
371 Will work with either numeric indices or dict-key indicies."""
372 ret = self[index]
373 del self[index]
374 return ret
375
376 - def get(self, key, defaultValue=None):
377 """Returns named result matching the given key, or if there is no
378 such name, then returns the given defaultValue or None if no
379 defaultValue is specified."""
380 if key in self:
381 return self[key]
382 else:
383 return defaultValue
384
385 - def insert( self, index, insStr ):
392
394 """Returns all named result keys and values as a list of tuples."""
395 return [(k,self[k]) for k in self.__tokdict]
396
398 """Returns all named result values."""
399 return [ v[-1][0] for v in self.__tokdict.values() ]
400
402 if name not in self.__slots__:
403 if name in self.__tokdict:
404 if name not in self.__accumNames:
405 return self.__tokdict[name][-1][0]
406 else:
407 return ParseResults([ v[0] for v in self.__tokdict[name] ])
408 else:
409 return ""
410 return None
411
413 ret = self.copy()
414 ret += other
415 return ret
416
433
436
438 out = "["
439 sep = ""
440 for i in self.__toklist:
441 if isinstance(i, ParseResults):
442 out += sep + _ustr(i)
443 else:
444 out += sep + repr(i)
445 sep = ", "
446 out += "]"
447 return out
448
459
461 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
462 out = []
463 for res in self.__toklist:
464 if isinstance(res,ParseResults):
465 out.append( res.asList() )
466 else:
467 out.append( res )
468 return out
469
471 """Returns the named parse results as dictionary."""
472 return dict( self.items() )
473
482
483 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
484 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
485 nl = "\n"
486 out = []
487 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
488 for v in vlist ] )
489 nextLevelIndent = indent + " "
490
491
492 if not formatted:
493 indent = ""
494 nextLevelIndent = ""
495 nl = ""
496
497 selfTag = None
498 if doctag is not None:
499 selfTag = doctag
500 else:
501 if self.__name:
502 selfTag = self.__name
503
504 if not selfTag:
505 if namedItemsOnly:
506 return ""
507 else:
508 selfTag = "ITEM"
509
510 out += [ nl, indent, "<", selfTag, ">" ]
511
512 worklist = self.__toklist
513 for i,res in enumerate(worklist):
514 if isinstance(res,ParseResults):
515 if i in namedItems:
516 out += [ res.asXML(namedItems[i],
517 namedItemsOnly and doctag is None,
518 nextLevelIndent,
519 formatted)]
520 else:
521 out += [ res.asXML(None,
522 namedItemsOnly and doctag is None,
523 nextLevelIndent,
524 formatted)]
525 else:
526
527 resTag = None
528 if i in namedItems:
529 resTag = namedItems[i]
530 if not resTag:
531 if namedItemsOnly:
532 continue
533 else:
534 resTag = "ITEM"
535 xmlBodyText = _xml_escape(_ustr(res))
536 out += [ nl, nextLevelIndent, "<", resTag, ">",
537 xmlBodyText,
538 "</", resTag, ">" ]
539
540 out += [ nl, indent, "</", selfTag, ">" ]
541 return "".join(out)
542
544 for k,vlist in self.__tokdict.items():
545 for v,loc in vlist:
546 if sub is v:
547 return k
548 return None
549
551 """Returns the results name for this token expression."""
552 if self.__name:
553 return self.__name
554 elif self.__parent:
555 par = self.__parent()
556 if par:
557 return par.__lookup(self)
558 else:
559 return None
560 elif (len(self) == 1 and
561 len(self.__tokdict) == 1 and
562 self.__tokdict.values()[0][0][1] in (0,-1)):
563 return self.__tokdict.keys()[0]
564 else:
565 return None
566
567 - def dump(self,indent='',depth=0):
568 """Diagnostic method for listing out the contents of a ParseResults.
569 Accepts an optional indent argument so that this string can be embedded
570 in a nested display of other data."""
571 out = []
572 out.append( indent+_ustr(self.asList()) )
573 keys = self.items()
574 keys.sort()
575 for k,v in keys:
576 if out:
577 out.append('\n')
578 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
579 if isinstance(v,ParseResults):
580 if v.keys():
581
582 out.append( v.dump(indent,depth+1) )
583
584 else:
585 out.append(_ustr(v))
586 else:
587 out.append(_ustr(v))
588
589 return "".join(out)
590
591
598
611
614
616 """Returns current column within a string, counting newlines as line separators.
617 The first column is number 1.
618
619 Note: the default parsing behavior is to expand tabs in the input string
620 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
621 on parsing strings containing <TAB>s, and suggested methods to maintain a
622 consistent view of the parsed string, the parse location, and line and column
623 positions within the parsed string.
624 """
625 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
626
628 """Returns current line number within a string, counting newlines as line separators.
629 The first line is number 1.
630
631 Note: the default parsing behavior is to expand tabs in the input string
632 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
633 on parsing strings containing <TAB>s, and suggested methods to maintain a
634 consistent view of the parsed string, the parse location, and line and column
635 positions within the parsed string.
636 """
637 return strg.count("\n",0,loc) + 1
638
639 -def line( loc, strg ):
640 """Returns the line of text containing loc within a string, counting newlines as line separators.
641 """
642 lastCR = strg.rfind("\n", 0, loc)
643 nextCR = strg.find("\n", loc)
644 if nextCR > 0:
645 return strg[lastCR+1:nextCR]
646 else:
647 return strg[lastCR+1:]
648
651
654
656 print ("Exception raised:" + _ustr(exc))
657
659 """'Do-nothing' debug action, to suppress debugging output during parsing."""
660 pass
661
663 """Abstract base level parser element class."""
664 DEFAULT_WHITE_CHARS = " \n\t\r"
665
670 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
671
673 self.parseAction = list()
674 self.failAction = None
675
676 self.strRepr = None
677 self.resultsName = None
678 self.saveAsList = savelist
679 self.skipWhitespace = True
680 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
681 self.copyDefaultWhiteChars = True
682 self.mayReturnEmpty = False
683 self.keepTabs = False
684 self.ignoreExprs = list()
685 self.debug = False
686 self.streamlined = False
687 self.mayIndexError = True
688 self.errmsg = ""
689 self.modalResults = True
690 self.debugActions = ( None, None, None )
691 self.re = None
692 self.callPreparse = True
693 self.callDuringTry = False
694
696 """Make a copy of this ParserElement. Useful for defining different parse actions
697 for the same parsing pattern, using copies of the original parse element."""
698 cpy = copy.copy( self )
699 cpy.parseAction = self.parseAction[:]
700 cpy.ignoreExprs = self.ignoreExprs[:]
701 if self.copyDefaultWhiteChars:
702 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
703 return cpy
704
706 """Define name for this expression, for use in debugging."""
707 self.name = name
708 self.errmsg = "Expected " + self.name
709 if hasattr(self,"exception"):
710 self.exception.msg = self.errmsg
711 return self
712
714 """Define name for referencing matching tokens as a nested attribute
715 of the returned parse results.
716 NOTE: this returns a *copy* of the original ParserElement object;
717 this is so that the client can define a basic element, such as an
718 integer, and reference it in multiple places with different names.
719 """
720 newself = self.copy()
721 newself.resultsName = name
722 newself.modalResults = not listAllMatches
723 return newself
724
726 """Method to invoke the Python pdb debugger when this element is
727 about to be parsed. Set breakFlag to True to enable, False to
728 disable.
729 """
730 if breakFlag:
731 _parseMethod = self._parse
732 def breaker(instring, loc, doActions=True, callPreParse=True):
733 import pdb
734 pdb.set_trace()
735 return _parseMethod( instring, loc, doActions, callPreParse )
736 breaker._originalParseMethod = _parseMethod
737 self._parse = breaker
738 else:
739 if hasattr(self._parse,"_originalParseMethod"):
740 self._parse = self._parse._originalParseMethod
741 return self
742
744 """Internal method used to decorate parse actions that take fewer than 3 arguments,
745 so that all parse actions can be called as f(s,l,t)."""
746 STAR_ARGS = 4
747
748 try:
749 restore = None
750 if isinstance(f,type):
751 restore = f
752 f = f.__init__
753 if not _PY3K:
754 codeObj = f.func_code
755 else:
756 codeObj = f.code
757 if codeObj.co_flags & STAR_ARGS:
758 return f
759 numargs = codeObj.co_argcount
760 if not _PY3K:
761 if hasattr(f,"im_self"):
762 numargs -= 1
763 else:
764 if hasattr(f,"__self__"):
765 numargs -= 1
766 if restore:
767 f = restore
768 except AttributeError:
769 try:
770 if not _PY3K:
771 call_im_func_code = f.__call__.im_func.func_code
772 else:
773 call_im_func_code = f.__code__
774
775
776
777 if call_im_func_code.co_flags & STAR_ARGS:
778 return f
779 numargs = call_im_func_code.co_argcount
780 if not _PY3K:
781 if hasattr(f.__call__,"im_self"):
782 numargs -= 1
783 else:
784 if hasattr(f.__call__,"__self__"):
785 numargs -= 0
786 except AttributeError:
787 if not _PY3K:
788 call_func_code = f.__call__.func_code
789 else:
790 call_func_code = f.__call__.__code__
791
792 if call_func_code.co_flags & STAR_ARGS:
793 return f
794 numargs = call_func_code.co_argcount
795 if not _PY3K:
796 if hasattr(f.__call__,"im_self"):
797 numargs -= 1
798 else:
799 if hasattr(f.__call__,"__self__"):
800 numargs -= 1
801
802
803
804 if numargs == 3:
805 return f
806 else:
807 if numargs > 3:
808 def tmp(s,l,t):
809 return f(f.__call__.__self__, s,l,t)
810 if numargs == 2:
811 def tmp(s,l,t):
812 return f(l,t)
813 elif numargs == 1:
814 def tmp(s,l,t):
815 return f(t)
816 else:
817 def tmp(s,l,t):
818 return f()
819 try:
820 tmp.__name__ = f.__name__
821 except (AttributeError,TypeError):
822
823 pass
824 try:
825 tmp.__doc__ = f.__doc__
826 except (AttributeError,TypeError):
827
828 pass
829 try:
830 tmp.__dict__.update(f.__dict__)
831 except (AttributeError,TypeError):
832
833 pass
834 return tmp
835 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
836
838 """Define action to perform when successfully matching parse element definition.
839 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
840 fn(loc,toks), fn(toks), or just fn(), where:
841 - s = the original string being parsed (see note below)
842 - loc = the location of the matching substring
843 - toks = a list of the matched tokens, packaged as a ParseResults object
844 If the functions in fns modify the tokens, they can return them as the return
845 value from fn, and the modified list of tokens will replace the original.
846 Otherwise, fn does not need to return any value.
847
848 Note: the default parsing behavior is to expand tabs in the input string
849 before starting the parsing process. See L{I{parseString}<parseString>} for more information
850 on parsing strings containing <TAB>s, and suggested methods to maintain a
851 consistent view of the parsed string, the parse location, and line and column
852 positions within the parsed string.
853 """
854 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
855 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
856 return self
857
859 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
860 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
861 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
862 return self
863
865 """Define action to perform if parsing fails at this expression.
866 Fail acton fn is a callable function that takes the arguments
867 fn(s,loc,expr,err) where:
868 - s = string being parsed
869 - loc = location where expression match was attempted and failed
870 - expr = the parse expression that failed
871 - err = the exception thrown
872 The function returns no value. It may throw ParseFatalException
873 if it is desired to stop parsing immediately."""
874 self.failAction = fn
875 return self
876
878 exprsFound = True
879 while exprsFound:
880 exprsFound = False
881 for e in self.ignoreExprs:
882 try:
883 while 1:
884 loc,dummy = e._parse( instring, loc )
885 exprsFound = True
886 except ParseException:
887 pass
888 return loc
889
891 if self.ignoreExprs:
892 loc = self._skipIgnorables( instring, loc )
893
894 if self.skipWhitespace:
895 wt = self.whiteChars
896 instrlen = len(instring)
897 while loc < instrlen and instring[loc] in wt:
898 loc += 1
899
900 return loc
901
902 - def parseImpl( self, instring, loc, doActions=True ):
904
905 - def postParse( self, instring, loc, tokenlist ):
907
908
909 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
910 debugging = ( self.debug )
911
912 if debugging or self.failAction:
913
914 if (self.debugActions[0] ):
915 self.debugActions[0]( instring, loc, self )
916 if callPreParse and self.callPreparse:
917 preloc = self.preParse( instring, loc )
918 else:
919 preloc = loc
920 tokensStart = loc
921 try:
922 try:
923 loc,tokens = self.parseImpl( instring, preloc, doActions )
924 except IndexError:
925 raise ParseException( instring, len(instring), self.errmsg, self )
926 except ParseBaseException, err:
927
928 if self.debugActions[2]:
929 self.debugActions[2]( instring, tokensStart, self, err )
930 if self.failAction:
931 self.failAction( instring, tokensStart, self, err )
932 raise
933 else:
934 if callPreParse and self.callPreparse:
935 preloc = self.preParse( instring, loc )
936 else:
937 preloc = loc
938 tokensStart = loc
939 if self.mayIndexError or loc >= len(instring):
940 try:
941 loc,tokens = self.parseImpl( instring, preloc, doActions )
942 except IndexError:
943 raise ParseException( instring, len(instring), self.errmsg, self )
944 else:
945 loc,tokens = self.parseImpl( instring, preloc, doActions )
946
947 tokens = self.postParse( instring, loc, tokens )
948
949 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
950 if self.parseAction and (doActions or self.callDuringTry):
951 if debugging:
952 try:
953 for fn in self.parseAction:
954 tokens = fn( instring, tokensStart, retTokens )
955 if tokens is not None:
956 retTokens = ParseResults( tokens,
957 self.resultsName,
958 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
959 modal=self.modalResults )
960 except ParseBaseException, err:
961
962 if (self.debugActions[2] ):
963 self.debugActions[2]( instring, tokensStart, self, err )
964 raise
965 else:
966 for fn in self.parseAction:
967 tokens = fn( instring, tokensStart, retTokens )
968 if tokens is not None:
969 retTokens = ParseResults( tokens,
970 self.resultsName,
971 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
972 modal=self.modalResults )
973
974 if debugging:
975
976 if (self.debugActions[1] ):
977 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
978
979 return loc, retTokens
980
986
987
988
989 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
990 lookup = (self,instring,loc,callPreParse,doActions)
991 if lookup in ParserElement._exprArgCache:
992 value = ParserElement._exprArgCache[ lookup ]
993 if isinstance(value,Exception):
994 raise value
995 return value
996 else:
997 try:
998 value = self._parseNoCache( instring, loc, doActions, callPreParse )
999 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1000 return value
1001 except ParseBaseException, pe:
1002 ParserElement._exprArgCache[ lookup ] = pe
1003 raise
1004
1005 _parse = _parseNoCache
1006
1007
1008 _exprArgCache = {}
1011 resetCache = staticmethod(resetCache)
1012
1013 _packratEnabled = False
1015 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1016 Repeated parse attempts at the same string location (which happens
1017 often in many complex grammars) can immediately return a cached value,
1018 instead of re-executing parsing/validating code. Memoizing is done of
1019 both valid results and parsing exceptions.
1020
1021 This speedup may break existing programs that use parse actions that
1022 have side-effects. For this reason, packrat parsing is disabled when
1023 you first import pyparsing. To activate the packrat feature, your
1024 program must call the class method ParserElement.enablePackrat(). If
1025 your program uses psyco to "compile as you go", you must call
1026 enablePackrat before calling psyco.full(). If you do not do this,
1027 Python will crash. For best results, call enablePackrat() immediately
1028 after importing pyparsing.
1029 """
1030 if not ParserElement._packratEnabled:
1031 ParserElement._packratEnabled = True
1032 ParserElement._parse = ParserElement._parseCache
1033 enablePackrat = staticmethod(enablePackrat)
1034
1036 """Execute the parse expression with the given string.
1037 This is the main interface to the client code, once the complete
1038 expression has been built.
1039
1040 If you want the grammar to require that the entire input string be
1041 successfully parsed, then set parseAll to True (equivalent to ending
1042 the grammar with StringEnd()).
1043
1044 Note: parseString implicitly calls expandtabs() on the input string,
1045 in order to report proper column numbers in parse actions.
1046 If the input string contains tabs and
1047 the grammar uses parse actions that use the loc argument to index into the
1048 string being parsed, you can ensure you have a consistent view of the input
1049 string by:
1050 - calling parseWithTabs on your grammar before calling parseString
1051 (see L{I{parseWithTabs}<parseWithTabs>})
1052 - define your parse action using the full (s,loc,toks) signature, and
1053 reference the input string using the parse action's s argument
1054 - explictly expand the tabs in your input string before calling
1055 parseString
1056 """
1057 ParserElement.resetCache()
1058 if not self.streamlined:
1059 self.streamline()
1060
1061 for e in self.ignoreExprs:
1062 e.streamline()
1063 if not self.keepTabs:
1064 instring = instring.expandtabs()
1065 loc, tokens = self._parse( instring, 0 )
1066 if parseAll:
1067 loc = self.preParse( instring, loc )
1068 StringEnd()._parse( instring, loc )
1069 return tokens
1070
1072 """Scan the input string for expression matches. Each match will return the
1073 matching tokens, start location, and end location. May be called with optional
1074 maxMatches argument, to clip scanning after 'n' matches are found.
1075
1076 Note that the start and end locations are reported relative to the string
1077 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1078 strings with embedded tabs."""
1079 if not self.streamlined:
1080 self.streamline()
1081 for e in self.ignoreExprs:
1082 e.streamline()
1083
1084 if not self.keepTabs:
1085 instring = _ustr(instring).expandtabs()
1086 instrlen = len(instring)
1087 loc = 0
1088 preparseFn = self.preParse
1089 parseFn = self._parse
1090 ParserElement.resetCache()
1091 matches = 0
1092 while loc <= instrlen and matches < maxMatches:
1093 try:
1094 preloc = preparseFn( instring, loc )
1095 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1096 except ParseException:
1097 loc = preloc+1
1098 else:
1099 matches += 1
1100 yield tokens, preloc, nextLoc
1101 loc = nextLoc
1102
1127
1129 """Another extension to scanString, simplifying the access to the tokens found
1130 to match the given parse expression. May be called with optional
1131 maxMatches argument, to clip searching after 'n' matches are found.
1132 """
1133 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1134
1136 """Implementation of + operator - returns And"""
1137 if isinstance( other, basestring ):
1138 other = Literal( other )
1139 if not isinstance( other, ParserElement ):
1140 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1141 SyntaxWarning, stacklevel=2)
1142 return None
1143 return And( [ self, other ] )
1144
1146 """Implementation of + operator when left operand is not a ParserElement"""
1147 if isinstance( other, basestring ):
1148 other = Literal( other )
1149 if not isinstance( other, ParserElement ):
1150 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1151 SyntaxWarning, stacklevel=2)
1152 return None
1153 return other + self
1154
1156 """Implementation of - operator, returns And with error stop"""
1157 if isinstance( other, basestring ):
1158 other = Literal( other )
1159 if not isinstance( other, ParserElement ):
1160 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1161 SyntaxWarning, stacklevel=2)
1162 return None
1163 return And( [ self, And._ErrorStop(), other ] )
1164
1166 """Implementation of - operator when left operand is not a ParserElement"""
1167 if isinstance( other, basestring ):
1168 other = Literal( other )
1169 if not isinstance( other, ParserElement ):
1170 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1171 SyntaxWarning, stacklevel=2)
1172 return None
1173 return other - self
1174
1176 if isinstance(other,int):
1177 minElements, optElements = other,0
1178 elif isinstance(other,tuple):
1179 other = (other + (None, None))[:2]
1180 if other[0] is None:
1181 other = (0, other[1])
1182 if isinstance(other[0],int) and other[1] is None:
1183 if other[0] == 0:
1184 return ZeroOrMore(self)
1185 if other[0] == 1:
1186 return OneOrMore(self)
1187 else:
1188 return self*other[0] + ZeroOrMore(self)
1189 elif isinstance(other[0],int) and isinstance(other[1],int):
1190 minElements, optElements = other
1191 optElements -= minElements
1192 else:
1193 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1194 else:
1195 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1196
1197 if minElements < 0:
1198 raise ValueError("cannot multiply ParserElement by negative value")
1199 if optElements < 0:
1200 raise ValueError("second tuple value must be greater or equal to first tuple value")
1201 if minElements == optElements == 0:
1202 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1203
1204 if (optElements):
1205 def makeOptionalList(n):
1206 if n>1:
1207 return Optional(self + makeOptionalList(n-1))
1208 else:
1209 return Optional(self)
1210 if minElements:
1211 if minElements == 1:
1212 ret = self + makeOptionalList(optElements)
1213 else:
1214 ret = And([self]*minElements) + makeOptionalList(optElements)
1215 else:
1216 ret = makeOptionalList(optElements)
1217 else:
1218 if minElements == 1:
1219 ret = self
1220 else:
1221 ret = And([self]*minElements)
1222 return ret
1223
1226
1228 """Implementation of | operator - returns MatchFirst"""
1229 if isinstance( other, basestring ):
1230 other = Literal( other )
1231 if not isinstance( other, ParserElement ):
1232 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1233 SyntaxWarning, stacklevel=2)
1234 return None
1235 return MatchFirst( [ self, other ] )
1236
1238 """Implementation of | operator when left operand is not a ParserElement"""
1239 if isinstance( other, basestring ):
1240 other = Literal( other )
1241 if not isinstance( other, ParserElement ):
1242 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1243 SyntaxWarning, stacklevel=2)
1244 return None
1245 return other | self
1246
1248 """Implementation of ^ operator - returns Or"""
1249 if isinstance( other, basestring ):
1250 other = Literal( other )
1251 if not isinstance( other, ParserElement ):
1252 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1253 SyntaxWarning, stacklevel=2)
1254 return None
1255 return Or( [ self, other ] )
1256
1258 """Implementation of ^ operator when left operand is not a ParserElement"""
1259 if isinstance( other, basestring ):
1260 other = Literal( other )
1261 if not isinstance( other, ParserElement ):
1262 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1263 SyntaxWarning, stacklevel=2)
1264 return None
1265 return other ^ self
1266
1268 """Implementation of & operator - returns Each"""
1269 if isinstance( other, basestring ):
1270 other = Literal( other )
1271 if not isinstance( other, ParserElement ):
1272 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1273 SyntaxWarning, stacklevel=2)
1274 return None
1275 return Each( [ self, other ] )
1276
1278 """Implementation of & operator when left operand is not a ParserElement"""
1279 if isinstance( other, basestring ):
1280 other = Literal( other )
1281 if not isinstance( other, ParserElement ):
1282 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1283 SyntaxWarning, stacklevel=2)
1284 return None
1285 return other & self
1286
1288 """Implementation of ~ operator - returns NotAny"""
1289 return NotAny( self )
1290
1292 """Shortcut for setResultsName, with listAllMatches=default::
1293 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1294 could be written as::
1295 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1296 """
1297 return self.setResultsName(name)
1298
1300 """Suppresses the output of this ParserElement; useful to keep punctuation from
1301 cluttering up returned output.
1302 """
1303 return Suppress( self )
1304
1306 """Disables the skipping of whitespace before matching the characters in the
1307 ParserElement's defined pattern. This is normally only used internally by
1308 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1309 """
1310 self.skipWhitespace = False
1311 return self
1312
1314 """Overrides the default whitespace chars
1315 """
1316 self.skipWhitespace = True
1317 self.whiteChars = chars
1318 self.copyDefaultWhiteChars = False
1319 return self
1320
1322 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1323 Must be called before parseString when the input grammar contains elements that
1324 match <TAB> characters."""
1325 self.keepTabs = True
1326 return self
1327
1329 """Define expression to be ignored (e.g., comments) while doing pattern
1330 matching; may be called repeatedly, to define multiple comment or other
1331 ignorable patterns.
1332 """
1333 if isinstance( other, Suppress ):
1334 if other not in self.ignoreExprs:
1335 self.ignoreExprs.append( other )
1336 else:
1337 self.ignoreExprs.append( Suppress( other ) )
1338 return self
1339
1340 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1347
1356
1359
1362
1364 self.streamlined = True
1365 self.strRepr = None
1366 return self
1367
1370
1371 - def validate( self, validateTrace=[] ):
1372 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1373 self.checkRecursion( [] )
1374
1375 - def parseFile( self, file_or_filename, parseAll=False ):
1376 """Execute the parse expression on the given file or filename.
1377 If a filename is specified (instead of a file object),
1378 the entire file is opened, read, and closed before parsing.
1379 """
1380 try:
1381 file_contents = file_or_filename.read()
1382 except AttributeError:
1383 f = open(file_or_filename, "rb")
1384 file_contents = f.read()
1385 f.close()
1386 return self.parseString(file_contents, parseAll)
1387
1390
1392 if aname == "myException":
1393 self.myException = ret = self.getException();
1394 return ret;
1395 else:
1396 raise AttributeError("no such attribute " + aname)
1397
1407
1409 return not (self == other)
1410
1412 return hash(id(self))
1413
1415 return self == other
1416
1418 return not (self == other)
1419
1420
1421 -class Token(ParserElement):
1422 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1425
1426
1428 s = super(Token,self).setName(name)
1429 self.errmsg = "Expected " + self.name
1430
1431 return s
1432
1433
1435 """An empty token, will always match."""
1437 super(Empty,self).__init__()
1438 self.name = "Empty"
1439 self.mayReturnEmpty = True
1440 self.mayIndexError = False
1441
1442
1444 """A token that will never match."""
1446 super(NoMatch,self).__init__()
1447 self.name = "NoMatch"
1448 self.mayReturnEmpty = True
1449 self.mayIndexError = False
1450 self.errmsg = "Unmatchable token"
1451
1452
1453 - def parseImpl( self, instring, loc, doActions=True ):
1454 exc = self.myException
1455 exc.loc = loc
1456 exc.pstr = instring
1457 raise exc
1458
1459
1461 """Token to exactly match a specified string."""
1463 super(Literal,self).__init__()
1464 self.match = matchString
1465 self.matchLen = len(matchString)
1466 try:
1467 self.firstMatchChar = matchString[0]
1468 except IndexError:
1469 warnings.warn("null string passed to Literal; use Empty() instead",
1470 SyntaxWarning, stacklevel=2)
1471 self.__class__ = Empty
1472 self.name = '"%s"' % _ustr(self.match)
1473 self.errmsg = "Expected " + self.name
1474 self.mayReturnEmpty = False
1475
1476 self.mayIndexError = False
1477
1478
1479
1480
1481
1482 - def parseImpl( self, instring, loc, doActions=True ):
1483 if (instring[loc] == self.firstMatchChar and
1484 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1485 return loc+self.matchLen, self.match
1486
1487 exc = self.myException
1488 exc.loc = loc
1489 exc.pstr = instring
1490 raise exc
1491 _L = Literal
1492
1494 """Token to exactly match a specified string as a keyword, that is, it must be
1495 immediately followed by a non-keyword character. Compare with Literal::
1496 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1497 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1498 Accepts two optional constructor arguments in addition to the keyword string:
1499 identChars is a string of characters that would be valid identifier characters,
1500 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1501 matching, default is False.
1502 """
1503 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1504
1506 super(Keyword,self).__init__()
1507 self.match = matchString
1508 self.matchLen = len(matchString)
1509 try:
1510 self.firstMatchChar = matchString[0]
1511 except IndexError:
1512 warnings.warn("null string passed to Keyword; use Empty() instead",
1513 SyntaxWarning, stacklevel=2)
1514 self.name = '"%s"' % self.match
1515 self.errmsg = "Expected " + self.name
1516 self.mayReturnEmpty = False
1517
1518 self.mayIndexError = False
1519 self.caseless = caseless
1520 if caseless:
1521 self.caselessmatch = matchString.upper()
1522 identChars = identChars.upper()
1523 self.identChars = _str2dict(identChars)
1524
1525 - def parseImpl( self, instring, loc, doActions=True ):
1526 if self.caseless:
1527 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1528 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1529 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1530 return loc+self.matchLen, self.match
1531 else:
1532 if (instring[loc] == self.firstMatchChar and
1533 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1534 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1535 (loc == 0 or instring[loc-1] not in self.identChars) ):
1536 return loc+self.matchLen, self.match
1537
1538 exc = self.myException
1539 exc.loc = loc
1540 exc.pstr = instring
1541 raise exc
1542
1547
1552 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1553
1555 """Token to match a specified string, ignoring case of letters.
1556 Note: the matched results will always be in the case of the given
1557 match string, NOT the case of the input text.
1558 """
1560 super(CaselessLiteral,self).__init__( matchString.upper() )
1561
1562 self.returnString = matchString
1563 self.name = "'%s'" % self.returnString
1564 self.errmsg = "Expected " + self.name
1565
1566
1567 - def parseImpl( self, instring, loc, doActions=True ):
1568 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1569 return loc+self.matchLen, self.returnString
1570
1571 exc = self.myException
1572 exc.loc = loc
1573 exc.pstr = instring
1574 raise exc
1575
1579
1580 - def parseImpl( self, instring, loc, doActions=True ):
1581 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1582 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1583 return loc+self.matchLen, self.match
1584
1585 exc = self.myException
1586 exc.loc = loc
1587 exc.pstr = instring
1588 raise exc
1589
1591 """Token for matching words composed of allowed character sets.
1592 Defined with string containing all allowed initial characters,
1593 an optional string containing allowed body characters (if omitted,
1594 defaults to the initial character set), and an optional minimum,
1595 maximum, and/or exact length. The default value for min is 1 (a
1596 minimum value < 1 is not valid); the default values for max and exact
1597 are 0, meaning no maximum or exact length restriction.
1598 """
1599 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1600 super(Word,self).__init__()
1601 self.initCharsOrig = initChars
1602 self.initChars = _str2dict(initChars)
1603 if bodyChars :
1604 self.bodyCharsOrig = bodyChars
1605 self.bodyChars = _str2dict(bodyChars)
1606 else:
1607 self.bodyCharsOrig = initChars
1608 self.bodyChars = _str2dict(initChars)
1609
1610 self.maxSpecified = max > 0
1611
1612 if min < 1:
1613 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1614
1615 self.minLen = min
1616
1617 if max > 0:
1618 self.maxLen = max
1619 else:
1620 self.maxLen = _MAX_INT
1621
1622 if exact > 0:
1623 self.maxLen = exact
1624 self.minLen = exact
1625
1626 self.name = _ustr(self)
1627 self.errmsg = "Expected " + self.name
1628
1629 self.mayIndexError = False
1630 self.asKeyword = asKeyword
1631
1632 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1633 if self.bodyCharsOrig == self.initCharsOrig:
1634 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1635 elif len(self.bodyCharsOrig) == 1:
1636 self.reString = "%s[%s]*" % \
1637 (re.escape(self.initCharsOrig),
1638 _escapeRegexRangeChars(self.bodyCharsOrig),)
1639 else:
1640 self.reString = "[%s][%s]*" % \
1641 (_escapeRegexRangeChars(self.initCharsOrig),
1642 _escapeRegexRangeChars(self.bodyCharsOrig),)
1643 if self.asKeyword:
1644 self.reString = r"\b"+self.reString+r"\b"
1645 try:
1646 self.re = re.compile( self.reString )
1647 except:
1648 self.re = None
1649
1650 - def parseImpl( self, instring, loc, doActions=True ):
1651 if self.re:
1652 result = self.re.match(instring,loc)
1653 if not result:
1654 exc = self.myException
1655 exc.loc = loc
1656 exc.pstr = instring
1657 raise exc
1658
1659 loc = result.end()
1660 return loc,result.group()
1661
1662 if not(instring[ loc ] in self.initChars):
1663
1664 exc = self.myException
1665 exc.loc = loc
1666 exc.pstr = instring
1667 raise exc
1668 start = loc
1669 loc += 1
1670 instrlen = len(instring)
1671 bodychars = self.bodyChars
1672 maxloc = start + self.maxLen
1673 maxloc = min( maxloc, instrlen )
1674 while loc < maxloc and instring[loc] in bodychars:
1675 loc += 1
1676
1677 throwException = False
1678 if loc - start < self.minLen:
1679 throwException = True
1680 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1681 throwException = True
1682 if self.asKeyword:
1683 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1684 throwException = True
1685
1686 if throwException:
1687
1688 exc = self.myException
1689 exc.loc = loc
1690 exc.pstr = instring
1691 raise exc
1692
1693 return loc, instring[start:loc]
1694
1696 try:
1697 return super(Word,self).__str__()
1698 except:
1699 pass
1700
1701
1702 if self.strRepr is None:
1703
1704 def charsAsStr(s):
1705 if len(s)>4:
1706 return s[:4]+"..."
1707 else:
1708 return s
1709
1710 if ( self.initCharsOrig != self.bodyCharsOrig ):
1711 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1712 else:
1713 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1714
1715 return self.strRepr
1716
1717
1719 """Token for matching strings that match a given regular expression.
1720 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1721 """
1722 - def __init__( self, pattern, flags=0):
1723 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1724 super(Regex,self).__init__()
1725
1726 if len(pattern) == 0:
1727 warnings.warn("null string passed to Regex; use Empty() instead",
1728 SyntaxWarning, stacklevel=2)
1729
1730 self.pattern = pattern
1731 self.flags = flags
1732
1733 try:
1734 self.re = re.compile(self.pattern, self.flags)
1735 self.reString = self.pattern
1736 except sre_constants.error:
1737 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1738 SyntaxWarning, stacklevel=2)
1739 raise
1740
1741 self.name = _ustr(self)
1742 self.errmsg = "Expected " + self.name
1743
1744 self.mayIndexError = False
1745 self.mayReturnEmpty = True
1746
1747 - def parseImpl( self, instring, loc, doActions=True ):
1748 result = self.re.match(instring,loc)
1749 if not result:
1750 exc = self.myException
1751 exc.loc = loc
1752 exc.pstr = instring
1753 raise exc
1754
1755 loc = result.end()
1756 d = result.groupdict()
1757 ret = ParseResults(result.group())
1758 if d:
1759 for k in d:
1760 ret[k] = d[k]
1761 return loc,ret
1762
1764 try:
1765 return super(Regex,self).__str__()
1766 except:
1767 pass
1768
1769 if self.strRepr is None:
1770 self.strRepr = "Re:(%s)" % repr(self.pattern)
1771
1772 return self.strRepr
1773
1774
1776 """Token for matching strings that are delimited by quoting characters.
1777 """
1778 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1779 """
1780 Defined with the following parameters:
1781 - quoteChar - string of one or more characters defining the quote delimiting string
1782 - escChar - character to escape quotes, typically backslash (default=None)
1783 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1784 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1785 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1786 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1787 """
1788 super(QuotedString,self).__init__()
1789
1790
1791 quoteChar = quoteChar.strip()
1792 if len(quoteChar) == 0:
1793 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1794 raise SyntaxError()
1795
1796 if endQuoteChar is None:
1797 endQuoteChar = quoteChar
1798 else:
1799 endQuoteChar = endQuoteChar.strip()
1800 if len(endQuoteChar) == 0:
1801 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1802 raise SyntaxError()
1803
1804 self.quoteChar = quoteChar
1805 self.quoteCharLen = len(quoteChar)
1806 self.firstQuoteChar = quoteChar[0]
1807 self.endQuoteChar = endQuoteChar
1808 self.endQuoteCharLen = len(endQuoteChar)
1809 self.escChar = escChar
1810 self.escQuote = escQuote
1811 self.unquoteResults = unquoteResults
1812
1813 if multiline:
1814 self.flags = re.MULTILINE | re.DOTALL
1815 self.pattern = r'%s(?:[^%s%s]' % \
1816 ( re.escape(self.quoteChar),
1817 _escapeRegexRangeChars(self.endQuoteChar[0]),
1818 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1819 else:
1820 self.flags = 0
1821 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1822 ( re.escape(self.quoteChar),
1823 _escapeRegexRangeChars(self.endQuoteChar[0]),
1824 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1825 if len(self.endQuoteChar) > 1:
1826 self.pattern += (
1827 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1828 _escapeRegexRangeChars(self.endQuoteChar[i]))
1829 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1830 )
1831 if escQuote:
1832 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1833 if escChar:
1834 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1835 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1836 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1837
1838 try:
1839 self.re = re.compile(self.pattern, self.flags)
1840 self.reString = self.pattern
1841 except sre_constants.error:
1842 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1843 SyntaxWarning, stacklevel=2)
1844 raise
1845
1846 self.name = _ustr(self)
1847 self.errmsg = "Expected " + self.name
1848
1849 self.mayIndexError = False
1850 self.mayReturnEmpty = True
1851
1852 - def parseImpl( self, instring, loc, doActions=True ):
1853 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1854 if not result:
1855 exc = self.myException
1856 exc.loc = loc
1857 exc.pstr = instring
1858 raise exc
1859
1860 loc = result.end()
1861 ret = result.group()
1862
1863 if self.unquoteResults:
1864
1865
1866 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1867
1868 if isinstance(ret,basestring):
1869
1870 if self.escChar:
1871 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1872
1873
1874 if self.escQuote:
1875 ret = ret.replace(self.escQuote, self.endQuoteChar)
1876
1877 return loc, ret
1878
1880 try:
1881 return super(QuotedString,self).__str__()
1882 except:
1883 pass
1884
1885 if self.strRepr is None:
1886 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1887
1888 return self.strRepr
1889
1890
1892 """Token for matching words composed of characters *not* in a given set.
1893 Defined with string containing all disallowed characters, and an optional
1894 minimum, maximum, and/or exact length. The default value for min is 1 (a
1895 minimum value < 1 is not valid); the default values for max and exact
1896 are 0, meaning no maximum or exact length restriction.
1897 """
1898 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1899 super(CharsNotIn,self).__init__()
1900 self.skipWhitespace = False
1901 self.notChars = notChars
1902
1903 if min < 1:
1904 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1905
1906 self.minLen = min
1907
1908 if max > 0:
1909 self.maxLen = max
1910 else:
1911 self.maxLen = _MAX_INT
1912
1913 if exact > 0:
1914 self.maxLen = exact
1915 self.minLen = exact
1916
1917 self.name = _ustr(self)
1918 self.errmsg = "Expected " + self.name
1919 self.mayReturnEmpty = ( self.minLen == 0 )
1920
1921 self.mayIndexError = False
1922
1923 - def parseImpl( self, instring, loc, doActions=True ):
1924 if instring[loc] in self.notChars:
1925
1926 exc = self.myException
1927 exc.loc = loc
1928 exc.pstr = instring
1929 raise exc
1930
1931 start = loc
1932 loc += 1
1933 notchars = self.notChars
1934 maxlen = min( start+self.maxLen, len(instring) )
1935 while loc < maxlen and \
1936 (instring[loc] not in notchars):
1937 loc += 1
1938
1939 if loc - start < self.minLen:
1940
1941 exc = self.myException
1942 exc.loc = loc
1943 exc.pstr = instring
1944 raise exc
1945
1946 return loc, instring[start:loc]
1947
1949 try:
1950 return super(CharsNotIn, self).__str__()
1951 except:
1952 pass
1953
1954 if self.strRepr is None:
1955 if len(self.notChars) > 4:
1956 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1957 else:
1958 self.strRepr = "!W:(%s)" % self.notChars
1959
1960 return self.strRepr
1961
1963 """Special matching class for matching whitespace. Normally, whitespace is ignored
1964 by pyparsing grammars. This class is included when some whitespace structures
1965 are significant. Define with a string containing the whitespace characters to be
1966 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
1967 as defined for the Word class."""
1968 whiteStrs = {
1969 " " : "<SPC>",
1970 "\t": "<TAB>",
1971 "\n": "<LF>",
1972 "\r": "<CR>",
1973 "\f": "<FF>",
1974 }
1975 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1976 super(White,self).__init__()
1977 self.matchWhite = ws
1978 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
1979
1980 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1981 self.mayReturnEmpty = True
1982 self.errmsg = "Expected " + self.name
1983
1984
1985 self.minLen = min
1986
1987 if max > 0:
1988 self.maxLen = max
1989 else:
1990 self.maxLen = _MAX_INT
1991
1992 if exact > 0:
1993 self.maxLen = exact
1994 self.minLen = exact
1995
1996 - def parseImpl( self, instring, loc, doActions=True ):
1997 if not(instring[ loc ] in self.matchWhite):
1998
1999 exc = self.myException
2000 exc.loc = loc
2001 exc.pstr = instring
2002 raise exc
2003 start = loc
2004 loc += 1
2005 maxloc = start + self.maxLen
2006 maxloc = min( maxloc, len(instring) )
2007 while loc < maxloc and instring[loc] in self.matchWhite:
2008 loc += 1
2009
2010 if loc - start < self.minLen:
2011
2012 exc = self.myException
2013 exc.loc = loc
2014 exc.pstr = instring
2015 raise exc
2016
2017 return loc, instring[start:loc]
2018
2019
2022 super(_PositionToken,self).__init__()
2023 self.name=self.__class__.__name__
2024 self.mayReturnEmpty = True
2025 self.mayIndexError = False
2026
2028 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2032
2034 if col(loc,instring) != self.col:
2035 instrlen = len(instring)
2036 if self.ignoreExprs:
2037 loc = self._skipIgnorables( instring, loc )
2038 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2039 loc += 1
2040 return loc
2041
2042 - def parseImpl( self, instring, loc, doActions=True ):
2043 thiscol = col( loc, instring )
2044 if thiscol > self.col:
2045 raise ParseException( instring, loc, "Text not in expected column", self )
2046 newloc = loc + self.col - thiscol
2047 ret = instring[ loc: newloc ]
2048 return newloc, ret
2049
2051 """Matches if current position is at the beginning of a line within the parse string"""
2056
2057
2063
2064 - def parseImpl( self, instring, loc, doActions=True ):
2065 if not( loc==0 or
2066 (loc == self.preParse( instring, 0 )) or
2067 (instring[loc-1] == "\n") ):
2068
2069 exc = self.myException
2070 exc.loc = loc
2071 exc.pstr = instring
2072 raise exc
2073 return loc, []
2074
2076 """Matches if current position is at the end of a line within the parse string"""
2081
2082
2083 - def parseImpl( self, instring, loc, doActions=True ):
2084 if loc<len(instring):
2085 if instring[loc] == "\n":
2086 return loc+1, "\n"
2087 else:
2088
2089 exc = self.myException
2090 exc.loc = loc
2091 exc.pstr = instring
2092 raise exc
2093 elif loc == len(instring):
2094 return loc+1, []
2095 else:
2096 exc = self.myException
2097 exc.loc = loc
2098 exc.pstr = instring
2099 raise exc
2100
2102 """Matches if current position is at the beginning of the parse string"""
2106
2107
2108 - def parseImpl( self, instring, loc, doActions=True ):
2109 if loc != 0:
2110
2111 if loc != self.preParse( instring, 0 ):
2112
2113 exc = self.myException
2114 exc.loc = loc
2115 exc.pstr = instring
2116 raise exc
2117 return loc, []
2118
2120 """Matches if current position is at the end of the parse string"""
2124
2125
2126 - def parseImpl( self, instring, loc, doActions=True ):
2127 if loc < len(instring):
2128
2129 exc = self.myException
2130 exc.loc = loc
2131 exc.pstr = instring
2132 raise exc
2133 elif loc == len(instring):
2134 return loc+1, []
2135 elif loc > len(instring):
2136 return loc, []
2137 else:
2138 exc = self.myException
2139 exc.loc = loc
2140 exc.pstr = instring
2141 raise exc
2142
2144 """Matches if the current position is at the beginning of a Word, and
2145 is not preceded by any character in a given set of wordChars
2146 (default=printables). To emulate the \b behavior of regular expressions,
2147 use WordStart(alphanums). WordStart will also match at the beginning of
2148 the string being parsed, or at the beginning of a line.
2149 """
2154
2155 - def parseImpl(self, instring, loc, doActions=True ):
2156 if loc != 0:
2157 if (instring[loc-1] in self.wordChars or
2158 instring[loc] not in self.wordChars):
2159 exc = self.myException
2160 exc.loc = loc
2161 exc.pstr = instring
2162 raise exc
2163 return loc, []
2164
2166 """Matches if the current position is at the end of a Word, and
2167 is not followed by any character in a given set of wordChars
2168 (default=printables). To emulate the \b behavior of regular expressions,
2169 use WordEnd(alphanums). WordEnd will also match at the end of
2170 the string being parsed, or at the end of a line.
2171 """
2173 super(WordEnd,self).__init__()
2174 self.wordChars = _str2dict(wordChars)
2175 self.skipWhitespace = False
2176 self.errmsg = "Not at the end of a word"
2177
2178 - def parseImpl(self, instring, loc, doActions=True ):
2179 instrlen = len(instring)
2180 if instrlen>0 and loc<instrlen:
2181 if (instring[loc] in self.wordChars or
2182 instring[loc-1] not in self.wordChars):
2183
2184 exc = self.myException
2185 exc.loc = loc
2186 exc.pstr = instring
2187 raise exc
2188 return loc, []
2189
2190
2192 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2193 - def __init__( self, exprs, savelist = False ):
2194 super(ParseExpression,self).__init__(savelist)
2195 if isinstance( exprs, list ):
2196 self.exprs = exprs
2197 elif isinstance( exprs, basestring ):
2198 self.exprs = [ Literal( exprs ) ]
2199 else:
2200 self.exprs = [ exprs ]
2201 self.callPreparse = False
2202
2204 return self.exprs[i]
2205
2207 self.exprs.append( other )
2208 self.strRepr = None
2209 return self
2210
2212 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2213 all contained expressions."""
2214 self.skipWhitespace = False
2215 self.exprs = [ e.copy() for e in self.exprs ]
2216 for e in self.exprs:
2217 e.leaveWhitespace()
2218 return self
2219
2221 if isinstance( other, Suppress ):
2222 if other not in self.ignoreExprs:
2223 super( ParseExpression, self).ignore( other )
2224 for e in self.exprs:
2225 e.ignore( self.ignoreExprs[-1] )
2226 else:
2227 super( ParseExpression, self).ignore( other )
2228 for e in self.exprs:
2229 e.ignore( self.ignoreExprs[-1] )
2230 return self
2231
2233 try:
2234 return super(ParseExpression,self).__str__()
2235 except:
2236 pass
2237
2238 if self.strRepr is None:
2239 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2240 return self.strRepr
2241
2243 super(ParseExpression,self).streamline()
2244
2245 for e in self.exprs:
2246 e.streamline()
2247
2248
2249
2250
2251 if ( len(self.exprs) == 2 ):
2252 other = self.exprs[0]
2253 if ( isinstance( other, self.__class__ ) and
2254 not(other.parseAction) and
2255 other.resultsName is None and
2256 not other.debug ):
2257 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2258 self.strRepr = None
2259 self.mayReturnEmpty |= other.mayReturnEmpty
2260 self.mayIndexError |= other.mayIndexError
2261
2262 other = self.exprs[-1]
2263 if ( isinstance( other, self.__class__ ) and
2264 not(other.parseAction) and
2265 other.resultsName is None and
2266 not other.debug ):
2267 self.exprs = self.exprs[:-1] + other.exprs[:]
2268 self.strRepr = None
2269 self.mayReturnEmpty |= other.mayReturnEmpty
2270 self.mayIndexError |= other.mayIndexError
2271
2272 return self
2273
2277
2278 - def validate( self, validateTrace=[] ):
2279 tmp = validateTrace[:]+[self]
2280 for e in self.exprs:
2281 e.validate(tmp)
2282 self.checkRecursion( [] )
2283
2284 -class And(ParseExpression):
2285 """Requires all given ParseExpressions to be found in the given order.
2286 Expressions may be separated by whitespace.
2287 May be constructed using the '+' operator.
2288 """
2289
2294
2295 - def __init__( self, exprs, savelist = True ):
2296 super(And,self).__init__(exprs, savelist)
2297 self.mayReturnEmpty = True
2298 for e in self.exprs:
2299 if not e.mayReturnEmpty:
2300 self.mayReturnEmpty = False
2301 break
2302 self.setWhitespaceChars( exprs[0].whiteChars )
2303 self.skipWhitespace = exprs[0].skipWhitespace
2304 self.callPreparse = True
2305
2306 - def parseImpl( self, instring, loc, doActions=True ):
2307
2308
2309 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2310 errorStop = False
2311 for e in self.exprs[1:]:
2312 if isinstance(e, And._ErrorStop):
2313 errorStop = True
2314 continue
2315 if errorStop:
2316 try:
2317 loc, exprtokens = e._parse( instring, loc, doActions )
2318 except ParseSyntaxException:
2319 raise
2320 except ParseBaseException, pe:
2321 raise ParseSyntaxException(pe)
2322 except IndexError, ie:
2323 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2324 else:
2325 loc, exprtokens = e._parse( instring, loc, doActions )
2326 if exprtokens or exprtokens.keys():
2327 resultlist += exprtokens
2328 return loc, resultlist
2329
2331 if isinstance( other, basestring ):
2332 other = Literal( other )
2333 return self.append( other )
2334
2336 subRecCheckList = parseElementList[:] + [ self ]
2337 for e in self.exprs:
2338 e.checkRecursion( subRecCheckList )
2339 if not e.mayReturnEmpty:
2340 break
2341
2343 if hasattr(self,"name"):
2344 return self.name
2345
2346 if self.strRepr is None:
2347 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2348
2349 return self.strRepr
2350
2351
2352 -class Or(ParseExpression):
2353 """Requires that at least one ParseExpression is found.
2354 If two expressions match, the expression that matches the longest string will be used.
2355 May be constructed using the '^' operator.
2356 """
2357 - def __init__( self, exprs, savelist = False ):
2358 super(Or,self).__init__(exprs, savelist)
2359 self.mayReturnEmpty = False
2360 for e in self.exprs:
2361 if e.mayReturnEmpty:
2362 self.mayReturnEmpty = True
2363 break
2364
2365 - def parseImpl( self, instring, loc, doActions=True ):
2366 maxExcLoc = -1
2367 maxMatchLoc = -1
2368 maxException = None
2369 for e in self.exprs:
2370 try:
2371 loc2 = e.tryParse( instring, loc )
2372 except ParseException, err:
2373 if err.loc > maxExcLoc:
2374 maxException = err
2375 maxExcLoc = err.loc
2376 except IndexError:
2377 if len(instring) > maxExcLoc:
2378 maxException = ParseException(instring,len(instring),e.errmsg,self)
2379 maxExcLoc = len(instring)
2380 else:
2381 if loc2 > maxMatchLoc:
2382 maxMatchLoc = loc2
2383 maxMatchExp = e
2384
2385 if maxMatchLoc < 0:
2386 if maxException is not None:
2387 raise maxException
2388 else:
2389 raise ParseException(instring, loc, "no defined alternatives to match", self)
2390
2391 return maxMatchExp._parse( instring, loc, doActions )
2392
2394 if isinstance( other, basestring ):
2395 other = Literal( other )
2396 return self.append( other )
2397
2399 if hasattr(self,"name"):
2400 return self.name
2401
2402 if self.strRepr is None:
2403 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2404
2405 return self.strRepr
2406
2408 subRecCheckList = parseElementList[:] + [ self ]
2409 for e in self.exprs:
2410 e.checkRecursion( subRecCheckList )
2411
2412
2414 """Requires that at least one ParseExpression is found.
2415 If two expressions match, the first one listed is the one that will match.
2416 May be constructed using the '|' operator.
2417 """
2418 - def __init__( self, exprs, savelist = False ):
2419 super(MatchFirst,self).__init__(exprs, savelist)
2420 if exprs:
2421 self.mayReturnEmpty = False
2422 for e in self.exprs:
2423 if e.mayReturnEmpty:
2424 self.mayReturnEmpty = True
2425 break
2426 else:
2427 self.mayReturnEmpty = True
2428
2429 - def parseImpl( self, instring, loc, doActions=True ):
2430 maxExcLoc = -1
2431 maxException = None
2432 for e in self.exprs:
2433 try:
2434 ret = e._parse( instring, loc, doActions )
2435 return ret
2436 except ParseException, err:
2437 if err.loc > maxExcLoc:
2438 maxException = err
2439 maxExcLoc = err.loc
2440 except IndexError:
2441 if len(instring) > maxExcLoc:
2442 maxException = ParseException(instring,len(instring),e.errmsg,self)
2443 maxExcLoc = len(instring)
2444
2445
2446 else:
2447 if maxException is not None:
2448 raise maxException
2449 else:
2450 raise ParseException(instring, loc, "no defined alternatives to match", self)
2451
2453 if isinstance( other, basestring ):
2454 other = Literal( other )
2455 return self.append( other )
2456
2458 if hasattr(self,"name"):
2459 return self.name
2460
2461 if self.strRepr is None:
2462 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2463
2464 return self.strRepr
2465
2467 subRecCheckList = parseElementList[:] + [ self ]
2468 for e in self.exprs:
2469 e.checkRecursion( subRecCheckList )
2470
2471
2472 -class Each(ParseExpression):
2473 """Requires all given ParseExpressions to be found, but in any order.
2474 Expressions may be separated by whitespace.
2475 May be constructed using the '&' operator.
2476 """
2477 - def __init__( self, exprs, savelist = True ):
2478 super(Each,self).__init__(exprs, savelist)
2479 self.mayReturnEmpty = True
2480 for e in self.exprs:
2481 if not e.mayReturnEmpty:
2482 self.mayReturnEmpty = False
2483 break
2484 self.skipWhitespace = True
2485 self.initExprGroups = True
2486
2487 - def parseImpl( self, instring, loc, doActions=True ):
2488 if self.initExprGroups:
2489 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2490 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2491 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2492 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2493 self.required += self.multirequired
2494 self.initExprGroups = False
2495 tmpLoc = loc
2496 tmpReqd = self.required[:]
2497 tmpOpt = self.optionals[:]
2498 matchOrder = []
2499
2500 keepMatching = True
2501 while keepMatching:
2502 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2503 failed = []
2504 for e in tmpExprs:
2505 try:
2506 tmpLoc = e.tryParse( instring, tmpLoc )
2507 except ParseException:
2508 failed.append(e)
2509 else:
2510 matchOrder.append(e)
2511 if e in tmpReqd:
2512 tmpReqd.remove(e)
2513 elif e in tmpOpt:
2514 tmpOpt.remove(e)
2515 if len(failed) == len(tmpExprs):
2516 keepMatching = False
2517
2518 if tmpReqd:
2519 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2520 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2521
2522
2523 matchOrder += [ e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt ]
2524
2525 resultlist = []
2526 for e in matchOrder:
2527 loc,results = e._parse(instring,loc,doActions)
2528 resultlist.append(results)
2529
2530 finalResults = ParseResults([])
2531 for r in resultlist:
2532 dups = {}
2533 for k in r.keys():
2534 if k in finalResults.keys():
2535 tmp = ParseResults(finalResults[k])
2536 tmp += ParseResults(r[k])
2537 dups[k] = tmp
2538 finalResults += ParseResults(r)
2539 for k,v in dups.items():
2540 finalResults[k] = v
2541 return loc, finalResults
2542
2544 if hasattr(self,"name"):
2545 return self.name
2546
2547 if self.strRepr is None:
2548 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2549
2550 return self.strRepr
2551
2553 subRecCheckList = parseElementList[:] + [ self ]
2554 for e in self.exprs:
2555 e.checkRecursion( subRecCheckList )
2556
2557
2559 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2560 - def __init__( self, expr, savelist=False ):
2561 super(ParseElementEnhance,self).__init__(savelist)
2562 if isinstance( expr, basestring ):
2563 expr = Literal(expr)
2564 self.expr = expr
2565 self.strRepr = None
2566 if expr is not None:
2567 self.mayIndexError = expr.mayIndexError
2568 self.mayReturnEmpty = expr.mayReturnEmpty
2569 self.setWhitespaceChars( expr.whiteChars )
2570 self.skipWhitespace = expr.skipWhitespace
2571 self.saveAsList = expr.saveAsList
2572 self.callPreparse = expr.callPreparse
2573 self.ignoreExprs.extend(expr.ignoreExprs)
2574
2575 - def parseImpl( self, instring, loc, doActions=True ):
2576 if self.expr is not None:
2577 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2578 else:
2579 raise ParseException("",loc,self.errmsg,self)
2580
2582 self.skipWhitespace = False
2583 self.expr = self.expr.copy()
2584 if self.expr is not None:
2585 self.expr.leaveWhitespace()
2586 return self
2587
2589 if isinstance( other, Suppress ):
2590 if other not in self.ignoreExprs:
2591 super( ParseElementEnhance, self).ignore( other )
2592 if self.expr is not None:
2593 self.expr.ignore( self.ignoreExprs[-1] )
2594 else:
2595 super( ParseElementEnhance, self).ignore( other )
2596 if self.expr is not None:
2597 self.expr.ignore( self.ignoreExprs[-1] )
2598 return self
2599
2605
2607 if self in parseElementList:
2608 raise RecursiveGrammarException( parseElementList+[self] )
2609 subRecCheckList = parseElementList[:] + [ self ]
2610 if self.expr is not None:
2611 self.expr.checkRecursion( subRecCheckList )
2612
2613 - def validate( self, validateTrace=[] ):
2614 tmp = validateTrace[:]+[self]
2615 if self.expr is not None:
2616 self.expr.validate(tmp)
2617 self.checkRecursion( [] )
2618
2620 try:
2621 return super(ParseElementEnhance,self).__str__()
2622 except:
2623 pass
2624
2625 if self.strRepr is None and self.expr is not None:
2626 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2627 return self.strRepr
2628
2629
2631 """Lookahead matching of the given parse expression. FollowedBy
2632 does *not* advance the parsing position within the input string, it only
2633 verifies that the specified parse expression matches at the current
2634 position. FollowedBy always returns a null token list."""
2638
2639 - def parseImpl( self, instring, loc, doActions=True ):
2642
2643
2644 -class NotAny(ParseElementEnhance):
2645 """Lookahead to disallow matching with the given parse expression. NotAny
2646 does *not* advance the parsing position within the input string, it only
2647 verifies that the specified parse expression does *not* match at the current
2648 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2649 always returns a null token list. May be constructed using the '~' operator."""
2651 super(NotAny,self).__init__(expr)
2652
2653 self.skipWhitespace = False
2654 self.mayReturnEmpty = True
2655 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2656
2657
2658 - def parseImpl( self, instring, loc, doActions=True ):
2659 try:
2660 self.expr.tryParse( instring, loc )
2661 except (ParseException,IndexError):
2662 pass
2663 else:
2664
2665 exc = self.myException
2666 exc.loc = loc
2667 exc.pstr = instring
2668 raise exc
2669 return loc, []
2670
2672 if hasattr(self,"name"):
2673 return self.name
2674
2675 if self.strRepr is None:
2676 self.strRepr = "~{" + _ustr(self.expr) + "}"
2677
2678 return self.strRepr
2679
2680
2682 """Optional repetition of zero or more of the given expression."""
2686
2687 - def parseImpl( self, instring, loc, doActions=True ):
2688 tokens = []
2689 try:
2690 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2691 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2692 while 1:
2693 if hasIgnoreExprs:
2694 preloc = self._skipIgnorables( instring, loc )
2695 else:
2696 preloc = loc
2697 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2698 if tmptokens or tmptokens.keys():
2699 tokens += tmptokens
2700 except (ParseException,IndexError):
2701 pass
2702
2703 return loc, tokens
2704
2706 if hasattr(self,"name"):
2707 return self.name
2708
2709 if self.strRepr is None:
2710 self.strRepr = "[" + _ustr(self.expr) + "]..."
2711
2712 return self.strRepr
2713
2718
2719
2721 """Repetition of one or more of the given expression."""
2722 - def parseImpl( self, instring, loc, doActions=True ):
2723
2724 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2725 try:
2726 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2727 while 1:
2728 if hasIgnoreExprs:
2729 preloc = self._skipIgnorables( instring, loc )
2730 else:
2731 preloc = loc
2732 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2733 if tmptokens or tmptokens.keys():
2734 tokens += tmptokens
2735 except (ParseException,IndexError):
2736 pass
2737
2738 return loc, tokens
2739
2741 if hasattr(self,"name"):
2742 return self.name
2743
2744 if self.strRepr is None:
2745 self.strRepr = "{" + _ustr(self.expr) + "}..."
2746
2747 return self.strRepr
2748
2753
2760
2761 _optionalNotMatched = _NullToken()
2763 """Optional matching of the given expression.
2764 A default return string can also be specified, if the optional expression
2765 is not found.
2766 """
2768 super(Optional,self).__init__( exprs, savelist=False )
2769 self.defaultValue = default
2770 self.mayReturnEmpty = True
2771
2772 - def parseImpl( self, instring, loc, doActions=True ):
2773 try:
2774 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2775 except (ParseException,IndexError):
2776 if self.defaultValue is not _optionalNotMatched:
2777 if self.expr.resultsName:
2778 tokens = ParseResults([ self.defaultValue ])
2779 tokens[self.expr.resultsName] = self.defaultValue
2780 else:
2781 tokens = [ self.defaultValue ]
2782 else:
2783 tokens = []
2784 return loc, tokens
2785
2787 if hasattr(self,"name"):
2788 return self.name
2789
2790 if self.strRepr is None:
2791 self.strRepr = "[" + _ustr(self.expr) + "]"
2792
2793 return self.strRepr
2794
2795
2796 -class SkipTo(ParseElementEnhance):
2797 """Token for skipping over all undefined text until the matched expression is found.
2798 If include is set to true, the matched expression is also consumed. The ignore
2799 argument is used to define grammars (typically quoted strings and comments) that
2800 might contain false matches.
2801 """
2802 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2803 super( SkipTo, self ).__init__( other )
2804 if ignore is not None:
2805 self.expr = self.expr.copy()
2806 self.expr.ignore(ignore)
2807 self.mayReturnEmpty = True
2808 self.mayIndexError = False
2809 self.includeMatch = include
2810 self.asList = False
2811 if failOn is not None and isinstance(failOn, basestring):
2812 self.failOn = Literal(failOn)
2813 else:
2814 self.failOn = failOn
2815 self.errmsg = "No match found for "+_ustr(self.expr)
2816
2817
2818 - def parseImpl( self, instring, loc, doActions=True ):
2819 startLoc = loc
2820 instrlen = len(instring)
2821 expr = self.expr
2822 failParse = False
2823 while loc <= instrlen:
2824 try:
2825 if self.failOn:
2826 failParse = True
2827 self.failOn.tryParse(instring, loc)
2828 failParse = False
2829 loc = expr._skipIgnorables( instring, loc )
2830 expr._parse( instring, loc, doActions=False, callPreParse=False )
2831 skipText = instring[startLoc:loc]
2832 if self.includeMatch:
2833 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2834 if mat:
2835 skipRes = ParseResults( skipText )
2836 skipRes += mat
2837 return loc, [ skipRes ]
2838 else:
2839 return loc, [ skipText ]
2840 else:
2841 return loc, [ skipText ]
2842 except (ParseException,IndexError):
2843 if failParse:
2844 raise
2845 else:
2846 loc += 1
2847 exc = self.myException
2848 exc.loc = loc
2849 exc.pstr = instring
2850 raise exc
2851
2852 -class Forward(ParseElementEnhance):
2853 """Forward declaration of an expression to be defined later -
2854 used for recursive grammars, such as algebraic infix notation.
2855 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2856
2857 Note: take care when assigning to Forward not to overlook precedence of operators.
2858 Specifically, '|' has a lower precedence than '<<', so that::
2859 fwdExpr << a | b | c
2860 will actually be evaluated as::
2861 (fwdExpr << a) | b | c
2862 thereby leaving b and c out as parseable alternatives. It is recommended that you
2863 explicitly group the values inserted into the Forward::
2864 fwdExpr << (a | b | c)
2865 """
2868
2870 if isinstance( other, basestring ):
2871 other = Literal(other)
2872 self.expr = other
2873 self.mayReturnEmpty = other.mayReturnEmpty
2874 self.strRepr = None
2875 self.mayIndexError = self.expr.mayIndexError
2876 self.mayReturnEmpty = self.expr.mayReturnEmpty
2877 self.setWhitespaceChars( self.expr.whiteChars )
2878 self.skipWhitespace = self.expr.skipWhitespace
2879 self.saveAsList = self.expr.saveAsList
2880 self.ignoreExprs.extend(self.expr.ignoreExprs)
2881 return None
2882
2884 self.skipWhitespace = False
2885 return self
2886
2888 if not self.streamlined:
2889 self.streamlined = True
2890 if self.expr is not None:
2891 self.expr.streamline()
2892 return self
2893
2894 - def validate( self, validateTrace=[] ):
2895 if self not in validateTrace:
2896 tmp = validateTrace[:]+[self]
2897 if self.expr is not None:
2898 self.expr.validate(tmp)
2899 self.checkRecursion([])
2900
2902 if hasattr(self,"name"):
2903 return self.name
2904
2905 self._revertClass = self.__class__
2906 self.__class__ = _ForwardNoRecurse
2907 try:
2908 if self.expr is not None:
2909 retString = _ustr(self.expr)
2910 else:
2911 retString = "None"
2912 finally:
2913 self.__class__ = self._revertClass
2914 return self.__class__.__name__ + ": " + retString
2915
2917 if self.expr is not None:
2918 return super(Forward,self).copy()
2919 else:
2920 ret = Forward()
2921 ret << self
2922 return ret
2923
2927
2929 """Abstract subclass of ParseExpression, for converting parsed results."""
2930 - def __init__( self, expr, savelist=False ):
2933
2934 -class Upcase(TokenConverter):
2935 """Converter to upper case all matching tokens."""
2937 super(Upcase,self).__init__(*args)
2938 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2939 DeprecationWarning,stacklevel=2)
2940
2941 - def postParse( self, instring, loc, tokenlist ):
2942 return list(map( string.upper, tokenlist ))
2943
2944
2946 """Converter to concatenate all matching tokens to a single string.
2947 By default, the matching patterns must also be contiguous in the input string;
2948 this can be disabled by specifying 'adjacent=False' in the constructor.
2949 """
2950 - def __init__( self, expr, joinString="", adjacent=True ):
2951 super(Combine,self).__init__( expr )
2952
2953 if adjacent:
2954 self.leaveWhitespace()
2955 self.adjacent = adjacent
2956 self.skipWhitespace = True
2957 self.joinString = joinString
2958
2965
2966 - def postParse( self, instring, loc, tokenlist ):
2967 retToks = tokenlist.copy()
2968 del retToks[:]
2969 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2970
2971 if self.resultsName and len(retToks.keys())>0:
2972 return [ retToks ]
2973 else:
2974 return retToks
2975
2976 -class Group(TokenConverter):
2977 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2979 super(Group,self).__init__( expr )
2980 self.saveAsList = True
2981
2982 - def postParse( self, instring, loc, tokenlist ):
2983 return [ tokenlist ]
2984
2985 -class Dict(TokenConverter):
2986 """Converter to return a repetitive expression as a list, but also as a dictionary.
2987 Each element can also be referenced using the first token in the expression as its key.
2988 Useful for tabular report scraping when the first column can be used as a item key.
2989 """
2991 super(Dict,self).__init__( exprs )
2992 self.saveAsList = True
2993
2994 - def postParse( self, instring, loc, tokenlist ):
2995 for i,tok in enumerate(tokenlist):
2996 if len(tok) == 0:
2997 continue
2998 ikey = tok[0]
2999 if isinstance(ikey,int):
3000 ikey = _ustr(tok[0]).strip()
3001 if len(tok)==1:
3002 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3003 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3004 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3005 else:
3006 dictvalue = tok.copy()
3007 del dictvalue[0]
3008 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3009 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3010 else:
3011 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3012
3013 if self.resultsName:
3014 return [ tokenlist ]
3015 else:
3016 return tokenlist
3017
3018
3020 """Converter for ignoring the results of a parsed expression."""
3021 - def postParse( self, instring, loc, tokenlist ):
3023
3026
3027
3029 """Wrapper for parse actions, to ensure they are only called once."""
3034 if not self.called:
3035 results = self.callable(s,l,t)
3036 self.called = True
3037 return results
3038 raise ParseException(s,l,"")
3041
3043 """Decorator for debugging parse actions."""
3044 f = ParserElement._normalizeParseActionArgs(f)
3045 def z(*paArgs):
3046 thisFunc = f.func_name
3047 s,l,t = paArgs[-3:]
3048 if len(paArgs)>3:
3049 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3050 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3051 try:
3052 ret = f(*paArgs)
3053 except Exception, exc:
3054 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3055 raise
3056 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3057 return ret
3058 try:
3059 z.__name__ = f.__name__
3060 except AttributeError:
3061 pass
3062 return z
3063
3064
3065
3066
3068 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3069 By default, the list elements and delimiters can have intervening whitespace, and
3070 comments, but this can be overridden by passing 'combine=True' in the constructor.
3071 If combine is set to True, the matching tokens are returned as a single token
3072 string, with the delimiters included; otherwise, the matching tokens are returned
3073 as a list of tokens, with the delimiters suppressed.
3074 """
3075 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3076 if combine:
3077 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3078 else:
3079 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3080
3082 """Helper to define a counted list of expressions.
3083 This helper defines a pattern of the form::
3084 integer expr expr expr...
3085 where the leading integer tells how many expr expressions follow.
3086 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3087 """
3088 arrayExpr = Forward()
3089 def countFieldParseAction(s,l,t):
3090 n = int(t[0])
3091 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3092 return []
3093 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3094
3096 if type(L) is not list: return [L]
3097 if L == []: return L
3098 return _flatten(L[0]) + _flatten(L[1:])
3099
3101 """Helper to define an expression that is indirectly defined from
3102 the tokens matched in a previous expression, that is, it looks
3103 for a 'repeat' of a previous expression. For example::
3104 first = Word(nums)
3105 second = matchPreviousLiteral(first)
3106 matchExpr = first + ":" + second
3107 will match "1:1", but not "1:2". Because this matches a
3108 previous literal, will also match the leading "1:1" in "1:10".
3109 If this is not desired, use matchPreviousExpr.
3110 Do *not* use with packrat parsing enabled.
3111 """
3112 rep = Forward()
3113 def copyTokenToRepeater(s,l,t):
3114 if t:
3115 if len(t) == 1:
3116 rep << t[0]
3117 else:
3118
3119 tflat = _flatten(t.asList())
3120 rep << And( [ Literal(tt) for tt in tflat ] )
3121 else:
3122 rep << Empty()
3123 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3124 return rep
3125
3127 """Helper to define an expression that is indirectly defined from
3128 the tokens matched in a previous expression, that is, it looks
3129 for a 'repeat' of a previous expression. For example::
3130 first = Word(nums)
3131 second = matchPreviousExpr(first)
3132 matchExpr = first + ":" + second
3133 will match "1:1", but not "1:2". Because this matches by
3134 expressions, will *not* match the leading "1:1" in "1:10";
3135 the expressions are evaluated first, and then compared, so
3136 "1" is compared with "10".
3137 Do *not* use with packrat parsing enabled.
3138 """
3139 rep = Forward()
3140 e2 = expr.copy()
3141 rep << e2
3142 def copyTokenToRepeater(s,l,t):
3143 matchTokens = _flatten(t.asList())
3144 def mustMatchTheseTokens(s,l,t):
3145 theseTokens = _flatten(t.asList())
3146 if theseTokens != matchTokens:
3147 raise ParseException("",0,"")
3148 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3149 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3150 return rep
3151
3153
3154 for c in r"\^-]":
3155 s = s.replace(c,_bslash+c)
3156 s = s.replace("\n",r"\n")
3157 s = s.replace("\t",r"\t")
3158 return _ustr(s)
3159
3160 -def oneOf( strs, caseless=False, useRegex=True ):
3161 """Helper to quickly define a set of alternative Literals, and makes sure to do
3162 longest-first testing when there is a conflict, regardless of the input order,
3163 but returns a MatchFirst for best performance.
3164
3165 Parameters:
3166 - strs - a string of space-delimited literals, or a list of string literals
3167 - caseless - (default=False) - treat all literals as caseless
3168 - useRegex - (default=True) - as an optimization, will generate a Regex
3169 object; otherwise, will generate a MatchFirst object (if caseless=True, or
3170 if creating a Regex raises an exception)
3171 """
3172 if caseless:
3173 isequal = ( lambda a,b: a.upper() == b.upper() )
3174 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3175 parseElementClass = CaselessLiteral
3176 else:
3177 isequal = ( lambda a,b: a == b )
3178 masks = ( lambda a,b: b.startswith(a) )
3179 parseElementClass = Literal
3180
3181 if isinstance(strs,(list,tuple)):
3182 symbols = strs[:]
3183 elif isinstance(strs,basestring):
3184 symbols = strs.split()
3185 else:
3186 warnings.warn("Invalid argument to oneOf, expected string or list",
3187 SyntaxWarning, stacklevel=2)
3188
3189 i = 0
3190 while i < len(symbols)-1:
3191 cur = symbols[i]
3192 for j,other in enumerate(symbols[i+1:]):
3193 if ( isequal(other, cur) ):
3194 del symbols[i+j+1]
3195 break
3196 elif ( masks(cur, other) ):
3197 del symbols[i+j+1]
3198 symbols.insert(i,other)
3199 cur = other
3200 break
3201 else:
3202 i += 1
3203
3204 if not caseless and useRegex:
3205
3206 try:
3207 if len(symbols)==len("".join(symbols)):
3208 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3209 else:
3210 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3211 except:
3212 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3213 SyntaxWarning, stacklevel=2)
3214
3215
3216
3217 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3218
3220 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3221 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3222 in the proper order. The key pattern can include delimiting markers or punctuation,
3223 as long as they are suppressed, thereby leaving the significant key text. The value
3224 pattern can include named results, so that the Dict results can include named token
3225 fields.
3226 """
3227 return Dict( ZeroOrMore( Group ( key + value ) ) )
3228
3229 -def originalTextFor(expr, asString=True):
3230 """Helper to return the original, untokenized text for a given expression. Useful to
3231 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3232 revert separate tokens with intervening whitespace back to the original matching
3233 input text. Simpler to use than the parse action keepOriginalText, and does not
3234 require the inspect module to chase up the call stack. By default, returns a
3235 string containing the original parsed text.
3236
3237 If the optional asString argument is passed as False, then the return value is a
3238 ParseResults containing any results names that were originally matched, and a
3239 single token containing the original matched text from the input string. So if
3240 the expression passed to originalTextFor contains expressions with defined
3241 results names, you must set asString to False if you want to preserve those
3242 results name values."""
3243 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3244 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
3245 if asString:
3246 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3247 else:
3248 def extractText(s,l,t):
3249 del t[:]
3250 t.insert(0, s[t._original_start:t._original_end])
3251 del t["_original_start"]
3252 del t["_original_end"]
3253 matchExpr.setParseAction(extractText)
3254 return matchExpr
3255
3256
3257 empty = Empty().setName("empty")
3258 lineStart = LineStart().setName("lineStart")
3259 lineEnd = LineEnd().setName("lineEnd")
3260 stringStart = StringStart().setName("stringStart")
3261 stringEnd = StringEnd().setName("stringEnd")
3262
3263 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3264 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3265 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3266 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3267 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3268 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3269 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3270
3271 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3272
3274 r"""Helper to easily define string ranges for use in Word construction. Borrows
3275 syntax from regexp '[]' string range definitions::
3276 srange("[0-9]") -> "0123456789"
3277 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3278 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3279 The input string must be enclosed in []'s, and the returned string is the expanded
3280 character set joined into a single string.
3281 The values enclosed in the []'s may be::
3282 a single character
3283 an escaped character with a leading backslash (such as \- or \])
3284 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3285 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3286 a range of any of the above, separated by a dash ('a-z', etc.)
3287 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3288 """
3289 try:
3290 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3291 except:
3292 return ""
3293
3295 """Helper method for defining parse actions that require matching at a specific
3296 column in the input text.
3297 """
3298 def verifyCol(strg,locn,toks):
3299 if col(locn,strg) != n:
3300 raise ParseException(strg,locn,"matched token not at column %d" % n)
3301 return verifyCol
3302
3304 """Helper method for common parse actions that simply return a literal value. Especially
3305 useful when used with transformString().
3306 """
3307 def _replFunc(*args):
3308 return [replStr]
3309 return _replFunc
3310
3312 """Helper parse action for removing quotation marks from parsed quoted strings.
3313 To use, add this parse action to quoted string using::
3314 quotedString.setParseAction( removeQuotes )
3315 """
3316 return t[0][1:-1]
3317
3319 """Helper parse action to convert tokens to upper case."""
3320 return [ tt.upper() for tt in map(_ustr,t) ]
3321
3323 """Helper parse action to convert tokens to lower case."""
3324 return [ tt.lower() for tt in map(_ustr,t) ]
3325
3326 -def keepOriginalText(s,startLoc,t):
3327 """Helper parse action to preserve original parsed text,
3328 overriding any nested parse actions."""
3329 try:
3330 endloc = getTokensEndLoc()
3331 except ParseException:
3332 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3333 del t[:]
3334 t += ParseResults(s[startLoc:endloc])
3335 return t
3336
3338 """Method to be called from within a parse action to determine the end
3339 location of the parsed tokens."""
3340 import inspect
3341 fstack = inspect.stack()
3342 try:
3343
3344 for f in fstack[2:]:
3345 if f[3] == "_parseNoCache":
3346 endloc = f[0].f_locals["loc"]
3347 return endloc
3348 else:
3349 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3350 finally:
3351 del fstack
3352
3380
3384
3388
3390 """Helper to create a validating parse action to be used with start tags created
3391 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3392 with a required attribute value, to avoid false matches on common tags such as
3393 <TD> or <DIV>.
3394
3395 Call withAttribute with a series of attribute names and values. Specify the list
3396 of filter attributes names and values as:
3397 - keyword arguments, as in (class="Customer",align="right"), or
3398 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3399 For attribute names with a namespace prefix, you must use the second form. Attribute
3400 names are matched insensitive to upper/lower case.
3401
3402 To verify that the attribute exists, but without specifying a value, pass
3403 withAttribute.ANY_VALUE as the value.
3404 """
3405 if args:
3406 attrs = args[:]
3407 else:
3408 attrs = attrDict.items()
3409 attrs = [(k,v) for k,v in attrs]
3410 def pa(s,l,tokens):
3411 for attrName,attrValue in attrs:
3412 if attrName not in tokens:
3413 raise ParseException(s,l,"no matching attribute " + attrName)
3414 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3415 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3416 (attrName, tokens[attrName], attrValue))
3417 return pa
3418 withAttribute.ANY_VALUE = object()
3419
3420 opAssoc = _Constants()
3421 opAssoc.LEFT = object()
3422 opAssoc.RIGHT = object()
3423
3425 """Helper method for constructing grammars of expressions made up of
3426 operators working in a precedence hierarchy. Operators may be unary or
3427 binary, left- or right-associative. Parse actions can also be attached
3428 to operator expressions.
3429
3430 Parameters:
3431 - baseExpr - expression representing the most basic element for the nested
3432 - opList - list of tuples, one for each operator precedence level in the
3433 expression grammar; each tuple is of the form
3434 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3435 - opExpr is the pyparsing expression for the operator;
3436 may also be a string, which will be converted to a Literal;
3437 if numTerms is 3, opExpr is a tuple of two expressions, for the
3438 two operators separating the 3 terms
3439 - numTerms is the number of terms for this operator (must
3440 be 1, 2, or 3)
3441 - rightLeftAssoc is the indicator whether the operator is
3442 right or left associative, using the pyparsing-defined
3443 constants opAssoc.RIGHT and opAssoc.LEFT.
3444 - parseAction is the parse action to be associated with
3445 expressions matching this operator expression (the
3446 parse action tuple member may be omitted)
3447 """
3448 ret = Forward()
3449 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3450 for i,operDef in enumerate(opList):
3451 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3452 if arity == 3:
3453 if opExpr is None or len(opExpr) != 2:
3454 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3455 opExpr1, opExpr2 = opExpr
3456 thisExpr = Forward()
3457 if rightLeftAssoc == opAssoc.LEFT:
3458 if arity == 1:
3459 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3460 elif arity == 2:
3461 if opExpr is not None:
3462 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3463 else:
3464 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3465 elif arity == 3:
3466 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3467 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3468 else:
3469 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3470 elif rightLeftAssoc == opAssoc.RIGHT:
3471 if arity == 1:
3472
3473 if not isinstance(opExpr, Optional):
3474 opExpr = Optional(opExpr)
3475 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3476 elif arity == 2:
3477 if opExpr is not None:
3478 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3479 else:
3480 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3481 elif arity == 3:
3482 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3483 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3484 else:
3485 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3486 else:
3487 raise ValueError("operator must indicate right or left associativity")
3488 if pa:
3489 matchExpr.setParseAction( pa )
3490 thisExpr << ( matchExpr | lastExpr )
3491 lastExpr = thisExpr
3492 ret << lastExpr
3493 return ret
3494
3495 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3496 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3497 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3498 unicodeString = Combine(_L('u') + quotedString.copy())
3499
3501 """Helper method for defining nested lists enclosed in opening and closing
3502 delimiters ("(" and ")" are the default).
3503
3504 Parameters:
3505 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3506 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3507 - content - expression for items within the nested lists (default=None)
3508 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3509
3510 If an expression is not provided for the content argument, the nested
3511 expression will capture all whitespace-delimited content between delimiters
3512 as a list of separate values.
3513
3514 Use the ignoreExpr argument to define expressions that may contain
3515 opening or closing characters that should not be treated as opening
3516 or closing characters for nesting, such as quotedString or a comment
3517 expression. Specify multiple expressions using an Or or MatchFirst.
3518 The default is quotedString, but if no expressions are to be ignored,
3519 then pass None for this argument.
3520 """
3521 if opener == closer:
3522 raise ValueError("opening and closing strings cannot be the same")
3523 if content is None:
3524 if isinstance(opener,basestring) and isinstance(closer,basestring):
3525 if len(opener) == 1 and len(closer)==1:
3526 if ignoreExpr is not None:
3527 content = (Combine(OneOrMore(~ignoreExpr +
3528 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3529 ).setParseAction(lambda t:t[0].strip()))
3530 else:
3531 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3532 ).setParseAction(lambda t:t[0].strip()))
3533 else:
3534 if ignoreExpr is not None:
3535 content = (Combine(OneOrMore(~ignoreExpr +
3536 ~Literal(opener) + ~Literal(closer) +
3537 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3538 ).setParseAction(lambda t:t[0].strip()))
3539 else:
3540 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3541 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3542 ).setParseAction(lambda t:t[0].strip()))
3543 else:
3544 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3545 ret = Forward()
3546 if ignoreExpr is not None:
3547 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3548 else:
3549 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3550 return ret
3551
3552 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3553 """Helper method for defining space-delimited indentation blocks, such as
3554 those used to define block statements in Python source code.
3555
3556 Parameters:
3557 - blockStatementExpr - expression defining syntax of statement that
3558 is repeated within the indented block
3559 - indentStack - list created by caller to manage indentation stack
3560 (multiple statementWithIndentedBlock expressions within a single grammar
3561 should share a common indentStack)
3562 - indent - boolean indicating whether block must be indented beyond the
3563 the current level; set to False for block of left-most statements
3564 (default=True)
3565
3566 A valid block must contain at least one blockStatement.
3567 """
3568 def checkPeerIndent(s,l,t):
3569 if l >= len(s): return
3570 curCol = col(l,s)
3571 if curCol != indentStack[-1]:
3572 if curCol > indentStack[-1]:
3573 raise ParseFatalException(s,l,"illegal nesting")
3574 raise ParseException(s,l,"not a peer entry")
3575
3576 def checkSubIndent(s,l,t):
3577 curCol = col(l,s)
3578 if curCol > indentStack[-1]:
3579 indentStack.append( curCol )
3580 else:
3581 raise ParseException(s,l,"not a subentry")
3582
3583 def checkUnindent(s,l,t):
3584 if l >= len(s): return
3585 curCol = col(l,s)
3586 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3587 raise ParseException(s,l,"not an unindent")
3588 indentStack.pop()
3589
3590 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3591 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3592 PEER = Empty().setParseAction(checkPeerIndent)
3593 UNDENT = Empty().setParseAction(checkUnindent)
3594 if indent:
3595 smExpr = Group( Optional(NL) +
3596 FollowedBy(blockStatementExpr) +
3597 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3598 else:
3599 smExpr = Group( Optional(NL) +
3600 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3601 blockStatementExpr.ignore(_bslash + LineEnd())
3602 return smExpr
3603
3604 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3605 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3606
3607 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3608 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3609 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3610 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3611
3612
3613 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3614
3615 htmlComment = Regex(r"<!--[\s\S]*?-->")
3616 restOfLine = Regex(r".*").leaveWhitespace()
3617 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3618 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3619
3620 javaStyleComment = cppStyleComment
3621 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3622 _noncomma = "".join( [ c for c in printables if c != "," ] )
3623 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3624 Optional( Word(" \t") +
3625 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3626 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3627
3628
3629 if __name__ == "__main__":
3630
3631 - def test( teststring ):
3632 try:
3633 tokens = simpleSQL.parseString( teststring )
3634 tokenlist = tokens.asList()
3635 print (teststring + "->" + str(tokenlist))
3636 print ("tokens = " + str(tokens))
3637 print ("tokens.columns = " + str(tokens.columns))
3638 print ("tokens.tables = " + str(tokens.tables))
3639 print (tokens.asXML("SQL",True))
3640 except ParseBaseException,err:
3641 print (teststring + "->")
3642 print (err.line)
3643 print (" "*(err.column-1) + "^")
3644 print (err)
3645 print()
3646
3647 selectToken = CaselessLiteral( "select" )
3648 fromToken = CaselessLiteral( "from" )
3649
3650 ident = Word( alphas, alphanums + "_$" )
3651 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3652 columnNameList = Group( delimitedList( columnName ) )
3653 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3654 tableNameList = Group( delimitedList( tableName ) )
3655 simpleSQL = ( selectToken + \
3656 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3657 fromToken + \
3658 tableNameList.setResultsName( "tables" ) )
3659
3660 test( "SELECT * from XYZZY, ABC" )
3661 test( "select * from SYS.XYZZY" )
3662 test( "Select A from Sys.dual" )
3663 test( "Select AA,BB,CC from Sys.dual" )
3664 test( "Select A, B, C from Sys.dual" )
3665 test( "Select A, B, C from Sys.dual" )
3666 test( "Xelect A, B, C from Sys.dual" )
3667 test( "Select A, B, C frox Sys.dual" )
3668 test( "Select" )
3669 test( "Select ^^^ frox Sys.dual" )
3670 test( "Select A, B, C from Sys.dual, Table2 " )
3671