You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

pyparsing.py 238KB


  1. #-*- coding: utf-8 -*-
  2. # module pyparsing.py
  3. #
  4. # Copyright (c) 2003-2019 Paul T. McGuire
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining
  7. # a copy of this software and associated documentation files (the
  8. # "Software"), to deal in the Software without restriction, including
  9. # without limitation the rights to use, copy, modify, merge, publish,
  10. # distribute, sublicense, and/or sell copies of the Software, and to
  11. # permit persons to whom the Software is furnished to do so, subject to
  12. # the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be
  15. # included in all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  21. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24. #
  25. __doc__ = \
  26. """
  27. pyparsing module - Classes and methods to define and execute parsing grammars
  28. =============================================================================
  29. The pyparsing module is an alternative approach to creating and
  30. executing simple grammars, vs. the traditional lex/yacc approach, or the
  31. use of regular expressions. With pyparsing, you don't need to learn
  32. a new syntax for defining grammars or matching expressions - the parsing
  33. module provides a library of classes that you use to construct the
  34. grammar directly in Python.
  35. Here is a program to parse "Hello, World!" (or any greeting of the form
  36. ``"<salutation>, <addressee>!"``), built up using :class:`Word`,
  37. :class:`Literal`, and :class:`And` elements
  38. (the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
  39. and the strings are auto-converted to :class:`Literal` expressions)::
  40. from pip._vendor.pyparsing import Word, alphas
  41. # define grammar of a greeting
  42. greet = Word(alphas) + "," + Word(alphas) + "!"
  43. hello = "Hello, World!"
  44. print (hello, "->", greet.parseString(hello))
  45. The program outputs the following::
  46. Hello, World! -> ['Hello', ',', 'World', '!']
  47. The Python representation of the grammar is quite readable, owing to the
  48. self-explanatory class names, and the use of '+', '|' and '^' operators.
  49. The :class:`ParseResults` object returned from
  50. :class:`ParserElement.parseString` can be
  51. accessed as a nested list, a dictionary, or an object with named
  52. attributes.
  53. The pyparsing module handles some of the problems that are typically
  54. vexing when writing text parsers:
  55. - extra or missing whitespace (the above program will also handle
  56. "Hello,World!", "Hello , World !", etc.)
  57. - quoted strings
  58. - embedded comments
  59. Getting Started -
  60. -----------------
  61. Visit the classes :class:`ParserElement` and :class:`ParseResults` to
  62. see the base classes that most other pyparsing
  63. classes inherit from. Use the docstrings for examples of how to:
  64. - construct literal match expressions from :class:`Literal` and
  65. :class:`CaselessLiteral` classes
  66. - construct character word-group expressions using the :class:`Word`
  67. class
  68. - see how to create repetitive expressions using :class:`ZeroOrMore`
  69. and :class:`OneOrMore` classes
  70. - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
  71. and :class:`'&'<Each>` operators to combine simple expressions into
  72. more complex ones
  73. - associate names with your parsed results using
  74. :class:`ParserElement.setResultsName`
  75. - find some helpful expression short-cuts like :class:`delimitedList`
  76. and :class:`oneOf`
  77. - find more useful common expressions in the :class:`pyparsing_common`
  78. namespace class
  79. """
  80. __version__ = "2.3.1"
  81. __versionTime__ = "09 Jan 2019 23:26 UTC"
  82. __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
  83. import string
  84. from weakref import ref as wkref
  85. import copy
  86. import sys
  87. import warnings
  88. import re
  89. import sre_constants
  90. import collections
  91. import pprint
  92. import traceback
  93. import types
  94. from datetime import datetime
  95. try:
  96. # Python 3
  97. from itertools import filterfalse
  98. except ImportError:
  99. from itertools import ifilterfalse as filterfalse
  100. try:
  101. from _thread import RLock
  102. except ImportError:
  103. from threading import RLock
  104. try:
  105. # Python 3
  106. from collections.abc import Iterable
  107. from collections.abc import MutableMapping
  108. except ImportError:
  109. # Python 2.7
  110. from collections import Iterable
  111. from collections import MutableMapping
  112. try:
  113. from collections import OrderedDict as _OrderedDict
  114. except ImportError:
  115. try:
  116. from ordereddict import OrderedDict as _OrderedDict
  117. except ImportError:
  118. _OrderedDict = None
  119. try:
  120. from types import SimpleNamespace
  121. except ImportError:
  122. class SimpleNamespace: pass
  123. #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
  124. __all__ = [
  125. 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
  126. 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
  127. 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
  128. 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
  129. 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
  130. 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
  131. 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
  132. 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
  133. 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
  134. 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
  135. 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
  136. 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
  137. 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
  138. 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
  139. 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
  140. 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
  141. 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
  142. 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
  143. ]
  144. system_version = tuple(sys.version_info)[:3]
  145. PY_3 = system_version[0] == 3
  146. if PY_3:
  147. _MAX_INT = sys.maxsize
  148. basestring = str
  149. unichr = chr
  150. unicode = str
  151. _ustr = str
  152. # build list of single arg builtins, that can be used as parse actions
  153. singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
  154. else:
  155. _MAX_INT = sys.maxint
  156. range = xrange
  157. def _ustr(obj):
  158. """Drop-in replacement for str(obj) that tries to be Unicode
  159. friendly. It first tries str(obj). If that fails with
  160. a UnicodeEncodeError, then it tries unicode(obj). It then
  161. < returns the unicode object | encodes it with the default
  162. encoding | ... >.
  163. """
  164. if isinstance(obj,unicode):
  165. return obj
  166. try:
  167. # If this works, then _ustr(obj) has the same behaviour as str(obj), so
  168. # it won't break any existing code.
  169. return str(obj)
  170. except UnicodeEncodeError:
  171. # Else encode it
  172. ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
  173. xmlcharref = Regex(r'&#\d+;')
  174. xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
  175. return xmlcharref.transformString(ret)
  176. # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
  177. singleArgBuiltins = []
  178. import __builtin__
  179. for fname in "sum len sorted reversed list tuple set any all min max".split():
  180. try:
  181. singleArgBuiltins.append(getattr(__builtin__,fname))
  182. except AttributeError:
  183. continue
  184. _generatorType = type((y for y in range(1)))
  185. def _xml_escape(data):
  186. """Escape &, <, >, ", ', etc. in a string of data."""
  187. # ampersand must be replaced first
  188. from_symbols = '&><"\''
  189. to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
  190. for from_,to_ in zip(from_symbols, to_symbols):
  191. data = data.replace(from_, to_)
  192. return data
  193. alphas = string.ascii_uppercase + string.ascii_lowercase
  194. nums = "0123456789"
  195. hexnums = nums + "ABCDEFabcdef"
  196. alphanums = alphas + nums
  197. _bslash = chr(92)
  198. printables = "".join(c for c in string.printable if c not in string.whitespace)
  199. class ParseBaseException(Exception):
  200. """base exception class for all parsing runtime exceptions"""
  201. # Performance tuning: we construct a *lot* of these, so keep this
  202. # constructor as small and fast as possible
  203. def __init__( self, pstr, loc=0, msg=None, elem=None ):
  204. self.loc = loc
  205. if msg is None:
  206. self.msg = pstr
  207. self.pstr = ""
  208. else:
  209. self.msg = msg
  210. self.pstr = pstr
  211. self.parserElement = elem
  212. self.args = (pstr, loc, msg)
  213. @classmethod
  214. def _from_exception(cls, pe):
  215. """
  216. internal factory method to simplify creating one type of ParseException
  217. from another - avoids having __init__ signature conflicts among subclasses
  218. """
  219. return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
  220. def __getattr__( self, aname ):
  221. """supported attributes by name are:
  222. - lineno - returns the line number of the exception text
  223. - col - returns the column number of the exception text
  224. - line - returns the line containing the exception text
  225. """
  226. if( aname == "lineno" ):
  227. return lineno( self.loc, self.pstr )
  228. elif( aname in ("col", "column") ):
  229. return col( self.loc, self.pstr )
  230. elif( aname == "line" ):
  231. return line( self.loc, self.pstr )
  232. else:
  233. raise AttributeError(aname)
  234. def __str__( self ):
  235. return "%s (at char %d), (line:%d, col:%d)" % \
  236. ( self.msg, self.loc, self.lineno, self.column )
  237. def __repr__( self ):
  238. return _ustr(self)
  239. def markInputline( self, markerString = ">!<" ):
  240. """Extracts the exception line from the input string, and marks
  241. the location of the exception with a special symbol.
  242. """
  243. line_str = self.line
  244. line_column = self.column - 1
  245. if markerString:
  246. line_str = "".join((line_str[:line_column],
  247. markerString, line_str[line_column:]))
  248. return line_str.strip()
  249. def __dir__(self):
  250. return "lineno col line".split() + dir(type(self))
  251. class ParseException(ParseBaseException):
  252. """
  253. Exception thrown when parse expressions don't match class;
  254. supported attributes by name are:
  255. - lineno - returns the line number of the exception text
  256. - col - returns the column number of the exception text
  257. - line - returns the line containing the exception text
  258. Example::
  259. try:
  260. Word(nums).setName("integer").parseString("ABC")
  261. except ParseException as pe:
  262. print(pe)
  263. print("column: {}".format(pe.col))
  264. prints::
  265. Expected integer (at char 0), (line:1, col:1)
  266. column: 1
  267. """
  268. @staticmethod
  269. def explain(exc, depth=16):
  270. """
  271. Method to take an exception and translate the Python internal traceback into a list
  272. of the pyparsing expressions that caused the exception to be raised.
  273. Parameters:
  274. - exc - exception raised during parsing (need not be a ParseException, in support
  275. of Python exceptions that might be raised in a parse action)
  276. - depth (default=16) - number of levels back in the stack trace to list expression
  277. and function names; if None, the full stack trace names will be listed; if 0, only
  278. the failing input line, marker, and exception string will be shown
  279. Returns a multi-line string listing the ParserElements and/or function names in the
  280. exception's stack trace.
  281. Note: the diagnostic output will include string representations of the expressions
  282. that failed to parse. These representations will be more helpful if you use `setName` to
  283. give identifiable names to your expressions. Otherwise they will use the default string
  284. forms, which may be cryptic to read.
  285. explain() is only supported under Python 3.
  286. """
  287. import inspect
  288. if depth is None:
  289. depth = sys.getrecursionlimit()
  290. ret = []
  291. if isinstance(exc, ParseBaseException):
  292. ret.append(exc.line)
  293. ret.append(' ' * (exc.col - 1) + '^')
  294. ret.append("{0}: {1}".format(type(exc).__name__, exc))
  295. if depth > 0:
  296. callers = inspect.getinnerframes(exc.__traceback__, context=depth)
  297. seen = set()
  298. for i, ff in enumerate(callers[-depth:]):
  299. frm = ff.frame
  300. f_self = frm.f_locals.get('self', None)
  301. if isinstance(f_self, ParserElement):
  302. if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
  303. continue
  304. if f_self in seen:
  305. continue
  306. seen.add(f_self)
  307. self_type = type(f_self)
  308. ret.append("{0}.{1} - {2}".format(self_type.__module__,
  309. self_type.__name__,
  310. f_self))
  311. elif f_self is not None:
  312. self_type = type(f_self)
  313. ret.append("{0}.{1}".format(self_type.__module__,
  314. self_type.__name__))
  315. else:
  316. code = frm.f_code
  317. if code.co_name in ('wrapper', '<module>'):
  318. continue
  319. ret.append("{0}".format(code.co_name))
  320. depth -= 1
  321. if not depth:
  322. break
  323. return '\n'.join(ret)
  324. class ParseFatalException(ParseBaseException):
  325. """user-throwable exception thrown when inconsistent parse content
  326. is found; stops all parsing immediately"""
  327. pass
  328. class ParseSyntaxException(ParseFatalException):
  329. """just like :class:`ParseFatalException`, but thrown internally
  330. when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
  331. that parsing is to stop immediately because an unbacktrackable
  332. syntax error has been found.
  333. """
  334. pass
  335. #~ class ReparseException(ParseBaseException):
  336. #~ """Experimental class - parse actions can raise this exception to cause
  337. #~ pyparsing to reparse the input string:
  338. #~ - with a modified input string, and/or
  339. #~ - with a modified start location
  340. #~ Set the values of the ReparseException in the constructor, and raise the
  341. #~ exception in a parse action to cause pyparsing to use the new string/location.
  342. #~ Setting the values as None causes no change to be made.
  343. #~ """
  344. #~ def __init_( self, newstring, restartLoc ):
  345. #~ self.newParseText = newstring
  346. #~ self.reparseLoc = restartLoc
  347. class RecursiveGrammarException(Exception):
  348. """exception thrown by :class:`ParserElement.validate` if the
  349. grammar could be improperly recursive
  350. """
  351. def __init__( self, parseElementList ):
  352. self.parseElementTrace = parseElementList
  353. def __str__( self ):
  354. return "RecursiveGrammarException: %s" % self.parseElementTrace
  355. class _ParseResultsWithOffset(object):
  356. def __init__(self,p1,p2):
  357. self.tup = (p1,p2)
  358. def __getitem__(self,i):
  359. return self.tup[i]
  360. def __repr__(self):
  361. return repr(self.tup[0])
  362. def setOffset(self,i):
  363. self.tup = (self.tup[0],i)
  364. class ParseResults(object):
  365. """Structured parse results, to provide multiple means of access to
  366. the parsed data:
  367. - as a list (``len(results)``)
  368. - by list index (``results[0], results[1]``, etc.)
  369. - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
  370. Example::
  371. integer = Word(nums)
  372. date_str = (integer.setResultsName("year") + '/'
  373. + integer.setResultsName("month") + '/'
  374. + integer.setResultsName("day"))
  375. # equivalent form:
  376. # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  377. # parseString returns a ParseResults object
  378. result = date_str.parseString("1999/12/31")
  379. def test(s, fn=repr):
  380. print("%s -> %s" % (s, fn(eval(s))))
  381. test("list(result)")
  382. test("result[0]")
  383. test("result['month']")
  384. test("result.day")
  385. test("'month' in result")
  386. test("'minutes' in result")
  387. test("result.dump()", str)
  388. prints::
  389. list(result) -> ['1999', '/', '12', '/', '31']
  390. result[0] -> '1999'
  391. result['month'] -> '12'
  392. result.day -> '31'
  393. 'month' in result -> True
  394. 'minutes' in result -> False
  395. result.dump() -> ['1999', '/', '12', '/', '31']
  396. - day: 31
  397. - month: 12
  398. - year: 1999
  399. """
  400. def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
  401. if isinstance(toklist, cls):
  402. return toklist
  403. retobj = object.__new__(cls)
  404. retobj.__doinit = True
  405. return retobj
  406. # Performance tuning: we construct a *lot* of these, so keep this
  407. # constructor as small and fast as possible
  408. def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
  409. if self.__doinit:
  410. self.__doinit = False
  411. self.__name = None
  412. self.__parent = None
  413. self.__accumNames = {}
  414. self.__asList = asList
  415. self.__modal = modal
  416. if toklist is None:
  417. toklist = []
  418. if isinstance(toklist, list):
  419. self.__toklist = toklist[:]
  420. elif isinstance(toklist, _generatorType):
  421. self.__toklist = list(toklist)
  422. else:
  423. self.__toklist = [toklist]
  424. self.__tokdict = dict()
  425. if name is not None and name:
  426. if not modal:
  427. self.__accumNames[name] = 0
  428. if isinstance(name,int):
  429. name = _ustr(name) # will always return a str, but use _ustr for consistency
  430. self.__name = name
  431. if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
  432. if isinstance(toklist,basestring):
  433. toklist = [ toklist ]
  434. if asList:
  435. if isinstance(toklist,ParseResults):
  436. self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
  437. else:
  438. self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
  439. self[name].__name = name
  440. else:
  441. try:
  442. self[name] = toklist[0]
  443. except (KeyError,TypeError,IndexError):
  444. self[name] = toklist
  445. def __getitem__( self, i ):
  446. if isinstance( i, (int,slice) ):
  447. return self.__toklist[i]
  448. else:
  449. if i not in self.__accumNames:
  450. return self.__tokdict[i][-1][0]
  451. else:
  452. return ParseResults([ v[0] for v in self.__tokdict[i] ])
  453. def __setitem__( self, k, v, isinstance=isinstance ):
  454. if isinstance(v,_ParseResultsWithOffset):
  455. self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
  456. sub = v[0]
  457. elif isinstance(k,(int,slice)):
  458. self.__toklist[k] = v
  459. sub = v
  460. else:
  461. self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
  462. sub = v
  463. if isinstance(sub,ParseResults):
  464. sub.__parent = wkref(self)
  465. def __delitem__( self, i ):
  466. if isinstance(i,(int,slice)):
  467. mylen = len( self.__toklist )
  468. del self.__toklist[i]
  469. # convert int to slice
  470. if isinstance(i, int):
  471. if i < 0:
  472. i += mylen
  473. i = slice(i, i+1)
  474. # get removed indices
  475. removed = list(range(*i.indices(mylen)))
  476. removed.reverse()
  477. # fixup indices in token dictionary
  478. for name,occurrences in self.__tokdict.items():
  479. for j in removed:
  480. for k, (value, position) in enumerate(occurrences):
  481. occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
  482. else:
  483. del self.__tokdict[i]
  484. def __contains__( self, k ):
  485. return k in self.__tokdict
  486. def __len__( self ): return len( self.__toklist )
  487. def __bool__(self): return ( not not self.__toklist )
  488. __nonzero__ = __bool__
  489. def __iter__( self ): return iter( self.__toklist )
  490. def __reversed__( self ): return iter( self.__toklist[::-1] )
  491. def _iterkeys( self ):
  492. if hasattr(self.__tokdict, "iterkeys"):
  493. return self.__tokdict.iterkeys()
  494. else:
  495. return iter(self.__tokdict)
  496. def _itervalues( self ):
  497. return (self[k] for k in self._iterkeys())
  498. def _iteritems( self ):
  499. return ((k, self[k]) for k in self._iterkeys())
  500. if PY_3:
  501. keys = _iterkeys
  502. """Returns an iterator of all named result keys."""
  503. values = _itervalues
  504. """Returns an iterator of all named result values."""
  505. items = _iteritems
  506. """Returns an iterator of all named result key-value tuples."""
  507. else:
  508. iterkeys = _iterkeys
  509. """Returns an iterator of all named result keys (Python 2.x only)."""
  510. itervalues = _itervalues
  511. """Returns an iterator of all named result values (Python 2.x only)."""
  512. iteritems = _iteritems
  513. """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
  514. def keys( self ):
  515. """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
  516. return list(self.iterkeys())
  517. def values( self ):
  518. """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
  519. return list(self.itervalues())
  520. def items( self ):
  521. """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
  522. return list(self.iteritems())
  523. def haskeys( self ):
  524. """Since keys() returns an iterator, this method is helpful in bypassing
  525. code that looks for the existence of any defined results names."""
  526. return bool(self.__tokdict)
  527. def pop( self, *args, **kwargs):
  528. """
  529. Removes and returns item at specified index (default= ``last``).
  530. Supports both ``list`` and ``dict`` semantics for ``pop()``. If
  531. passed no argument or an integer argument, it will use ``list``
  532. semantics and pop tokens from the list of parsed tokens. If passed
  533. a non-integer argument (most likely a string), it will use ``dict``
  534. semantics and pop the corresponding value from any defined results
  535. names. A second default return value argument is supported, just as in
  536. ``dict.pop()``.
  537. Example::
  538. def remove_first(tokens):
  539. tokens.pop(0)
  540. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  541. print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
  542. label = Word(alphas)
  543. patt = label("LABEL") + OneOrMore(Word(nums))
  544. print(patt.parseString("AAB 123 321").dump())
  545. # Use pop() in a parse action to remove named result (note that corresponding value is not
  546. # removed from list form of results)
  547. def remove_LABEL(tokens):
  548. tokens.pop("LABEL")
  549. return tokens
  550. patt.addParseAction(remove_LABEL)
  551. print(patt.parseString("AAB 123 321").dump())
  552. prints::
  553. ['AAB', '123', '321']
  554. - LABEL: AAB
  555. ['AAB', '123', '321']
  556. """
  557. if not args:
  558. args = [-1]
  559. for k,v in kwargs.items():
  560. if k == 'default':
  561. args = (args[0], v)
  562. else:
  563. raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
  564. if (isinstance(args[0], int) or
  565. len(args) == 1 or
  566. args[0] in self):
  567. index = args[0]
  568. ret = self[index]
  569. del self[index]
  570. return ret
  571. else:
  572. defaultvalue = args[1]
  573. return defaultvalue
  574. def get(self, key, defaultValue=None):
  575. """
  576. Returns named result matching the given key, or if there is no
  577. such name, then returns the given ``defaultValue`` or ``None`` if no
  578. ``defaultValue`` is specified.
  579. Similar to ``dict.get()``.
  580. Example::
  581. integer = Word(nums)
  582. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  583. result = date_str.parseString("1999/12/31")
  584. print(result.get("year")) # -> '1999'
  585. print(result.get("hour", "not specified")) # -> 'not specified'
  586. print(result.get("hour")) # -> None
  587. """
  588. if key in self:
  589. return self[key]
  590. else:
  591. return defaultValue
  592. def insert( self, index, insStr ):
  593. """
  594. Inserts new element at location index in the list of parsed tokens.
  595. Similar to ``list.insert()``.
  596. Example::
  597. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  598. # use a parse action to insert the parse location in the front of the parsed results
  599. def insert_locn(locn, tokens):
  600. tokens.insert(0, locn)
  601. print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
  602. """
  603. self.__toklist.insert(index, insStr)
  604. # fixup indices in token dictionary
  605. for name,occurrences in self.__tokdict.items():
  606. for k, (value, position) in enumerate(occurrences):
  607. occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
  608. def append( self, item ):
  609. """
  610. Add single element to end of ParseResults list of elements.
  611. Example::
  612. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  613. # use a parse action to compute the sum of the parsed integers, and add it to the end
  614. def append_sum(tokens):
  615. tokens.append(sum(map(int, tokens)))
  616. print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
  617. """
  618. self.__toklist.append(item)
  619. def extend( self, itemseq ):
  620. """
  621. Add sequence of elements to end of ParseResults list of elements.
  622. Example::
  623. patt = OneOrMore(Word(alphas))
  624. # use a parse action to append the reverse of the matched strings, to make a palindrome
  625. def make_palindrome(tokens):
  626. tokens.extend(reversed([t[::-1] for t in tokens]))
  627. return ''.join(tokens)
  628. print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
  629. """
  630. if isinstance(itemseq, ParseResults):
  631. self += itemseq
  632. else:
  633. self.__toklist.extend(itemseq)
  634. def clear( self ):
  635. """
  636. Clear all elements and results names.
  637. """
  638. del self.__toklist[:]
  639. self.__tokdict.clear()
  640. def __getattr__( self, name ):
  641. try:
  642. return self[name]
  643. except KeyError:
  644. return ""
  645. if name in self.__tokdict:
  646. if name not in self.__accumNames:
  647. return self.__tokdict[name][-1][0]
  648. else:
  649. return ParseResults([ v[0] for v in self.__tokdict[name] ])
  650. else:
  651. return ""
  652. def __add__( self, other ):
  653. ret = self.copy()
  654. ret += other
  655. return ret
  656. def __iadd__( self, other ):
  657. if other.__tokdict:
  658. offset = len(self.__toklist)
  659. addoffset = lambda a: offset if a<0 else a+offset
  660. otheritems = other.__tokdict.items()
  661. otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
  662. for (k,vlist) in otheritems for v in vlist]
  663. for k,v in otherdictitems:
  664. self[k] = v
  665. if isinstance(v[0],ParseResults):
  666. v[0].__parent = wkref(self)
  667. self.__toklist += other.__toklist
  668. self.__accumNames.update( other.__accumNames )
  669. return self
  670. def __radd__(self, other):
  671. if isinstance(other,int) and other == 0:
  672. # useful for merging many ParseResults using sum() builtin
  673. return self.copy()
  674. else:
  675. # this may raise a TypeError - so be it
  676. return other + self
  677. def __repr__( self ):
  678. return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
  679. def __str__( self ):
  680. return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
  681. def _asStringList( self, sep='' ):
  682. out = []
  683. for item in self.__toklist:
  684. if out and sep:
  685. out.append(sep)
  686. if isinstance( item, ParseResults ):
  687. out += item._asStringList()
  688. else:
  689. out.append( _ustr(item) )
  690. return out
  691. def asList( self ):
  692. """
  693. Returns the parse results as a nested list of matching tokens, all converted to strings.
  694. Example::
  695. patt = OneOrMore(Word(alphas))
  696. result = patt.parseString("sldkj lsdkj sldkj")
  697. # even though the result prints in string-like form, it is actually a pyparsing ParseResults
  698. print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
  699. # Use asList() to create an actual list
  700. result_list = result.asList()
  701. print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
  702. """
  703. return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
  704. def asDict( self ):
  705. """
  706. Returns the named parse results as a nested dictionary.
  707. Example::
  708. integer = Word(nums)
  709. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  710. result = date_str.parseString('12/31/1999')
  711. print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
  712. result_dict = result.asDict()
  713. print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
  714. # even though a ParseResults supports dict-like access, sometime you just need to have a dict
  715. import json
  716. print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
  717. print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
  718. """
  719. if PY_3:
  720. item_fn = self.items
  721. else:
  722. item_fn = self.iteritems
  723. def toItem(obj):
  724. if isinstance(obj, ParseResults):
  725. if obj.haskeys():
  726. return obj.asDict()
  727. else:
  728. return [toItem(v) for v in obj]
  729. else:
  730. return obj
  731. return dict((k,toItem(v)) for k,v in item_fn())
  732. def copy( self ):
  733. """
  734. Returns a new copy of a :class:`ParseResults` object.
  735. """
  736. ret = ParseResults( self.__toklist )
  737. ret.__tokdict = dict(self.__tokdict.items())
  738. ret.__parent = self.__parent
  739. ret.__accumNames.update( self.__accumNames )
  740. ret.__name = self.__name
  741. return ret
  742. def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
  743. """
  744. (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
  745. """
  746. nl = "\n"
  747. out = []
  748. namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
  749. for v in vlist)
  750. nextLevelIndent = indent + " "
  751. # collapse out indents if formatting is not desired
  752. if not formatted:
  753. indent = ""
  754. nextLevelIndent = ""
  755. nl = ""
  756. selfTag = None
  757. if doctag is not None:
  758. selfTag = doctag
  759. else:
  760. if self.__name:
  761. selfTag = self.__name
  762. if not selfTag:
  763. if namedItemsOnly:
  764. return ""
  765. else:
  766. selfTag = "ITEM"
  767. out += [ nl, indent, "<", selfTag, ">" ]
  768. for i,res in enumerate(self.__toklist):
  769. if isinstance(res,ParseResults):
  770. if i in namedItems:
  771. out += [ res.asXML(namedItems[i],
  772. namedItemsOnly and doctag is None,
  773. nextLevelIndent,
  774. formatted)]
  775. else:
  776. out += [ res.asXML(None,
  777. namedItemsOnly and doctag is None,
  778. nextLevelIndent,
  779. formatted)]
  780. else:
  781. # individual token, see if there is a name for it
  782. resTag = None
  783. if i in namedItems:
  784. resTag = namedItems[i]
  785. if not resTag:
  786. if namedItemsOnly:
  787. continue
  788. else:
  789. resTag = "ITEM"
  790. xmlBodyText = _xml_escape(_ustr(res))
  791. out += [ nl, nextLevelIndent, "<", resTag, ">",
  792. xmlBodyText,
  793. "</", resTag, ">" ]
  794. out += [ nl, indent, "</", selfTag, ">" ]
  795. return "".join(out)
  796. def __lookup(self,sub):
  797. for k,vlist in self.__tokdict.items():
  798. for v,loc in vlist:
  799. if sub is v:
  800. return k
  801. return None
  802. def getName(self):
  803. r"""
  804. Returns the results name for this token expression. Useful when several
  805. different expressions might match at a particular location.
  806. Example::
  807. integer = Word(nums)
  808. ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
  809. house_number_expr = Suppress('#') + Word(nums, alphanums)
  810. user_data = (Group(house_number_expr)("house_number")
  811. | Group(ssn_expr)("ssn")
  812. | Group(integer)("age"))
  813. user_info = OneOrMore(user_data)
  814. result = user_info.parseString("22 111-22-3333 #221B")
  815. for item in result:
  816. print(item.getName(), ':', item[0])
  817. prints::
  818. age : 22
  819. ssn : 111-22-3333
  820. house_number : 221B
  821. """
  822. if self.__name:
  823. return self.__name
  824. elif self.__parent:
  825. par = self.__parent()
  826. if par:
  827. return par.__lookup(self)
  828. else:
  829. return None
  830. elif (len(self) == 1 and
  831. len(self.__tokdict) == 1 and
  832. next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
  833. return next(iter(self.__tokdict.keys()))
  834. else:
  835. return None
  836. def dump(self, indent='', depth=0, full=True):
  837. """
  838. Diagnostic method for listing out the contents of
  839. a :class:`ParseResults`. Accepts an optional ``indent`` argument so
  840. that this string can be embedded in a nested display of other data.
  841. Example::
  842. integer = Word(nums)
  843. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  844. result = date_str.parseString('12/31/1999')
  845. print(result.dump())
  846. prints::
  847. ['12', '/', '31', '/', '1999']
  848. - day: 1999
  849. - month: 31
  850. - year: 12
  851. """
  852. out = []
  853. NL = '\n'
  854. out.append( indent+_ustr(self.asList()) )
  855. if full:
  856. if self.haskeys():
  857. items = sorted((str(k), v) for k,v in self.items())
  858. for k,v in items:
  859. if out:
  860. out.append(NL)
  861. out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
  862. if isinstance(v,ParseResults):
  863. if v:
  864. out.append( v.dump(indent,depth+1) )
  865. else:
  866. out.append(_ustr(v))
  867. else:
  868. out.append(repr(v))
  869. elif any(isinstance(vv,ParseResults) for vv in self):
  870. v = self
  871. for i,vv in enumerate(v):
  872. if isinstance(vv,ParseResults):
  873. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
  874. else:
  875. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
  876. return "".join(out)
  877. def pprint(self, *args, **kwargs):
  878. """
  879. Pretty-printer for parsed results as a list, using the
  880. `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
  881. Accepts additional positional or keyword args as defined for
  882. `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
  883. Example::
  884. ident = Word(alphas, alphanums)
  885. num = Word(nums)
  886. func = Forward()
  887. term = ident | num | Group('(' + func + ')')
  888. func <<= ident + Group(Optional(delimitedList(term)))
  889. result = func.parseString("fna a,b,(fnb c,d,200),100")
  890. result.pprint(width=40)
  891. prints::
  892. ['fna',
  893. ['a',
  894. 'b',
  895. ['(', 'fnb', ['c', 'd', '200'], ')'],
  896. '100']]
  897. """
  898. pprint.pprint(self.asList(), *args, **kwargs)
  899. # add support for pickle protocol
  900. def __getstate__(self):
  901. return ( self.__toklist,
  902. ( self.__tokdict.copy(),
  903. self.__parent is not None and self.__parent() or None,
  904. self.__accumNames,
  905. self.__name ) )
  906. def __setstate__(self,state):
  907. self.__toklist = state[0]
  908. (self.__tokdict,
  909. par,
  910. inAccumNames,
  911. self.__name) = state[1]
  912. self.__accumNames = {}
  913. self.__accumNames.update(inAccumNames)
  914. if par is not None:
  915. self.__parent = wkref(par)
  916. else:
  917. self.__parent = None
  918. def __getnewargs__(self):
  919. return self.__toklist, self.__name, self.__asList, self.__modal
  920. def __dir__(self):
  921. return (dir(type(self)) + list(self.keys()))
  922. MutableMapping.register(ParseResults)
  923. def col (loc,strg):
  924. """Returns current column within a string, counting newlines as line separators.
  925. The first column is number 1.
  926. Note: the default parsing behavior is to expand tabs in the input string
  927. before starting the parsing process. See
  928. :class:`ParserElement.parseString` for more
  929. information on parsing strings containing ``<TAB>`` s, and suggested
  930. methods to maintain a consistent view of the parsed string, the parse
  931. location, and line and column positions within the parsed string.
  932. """
  933. s = strg
  934. return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
  935. def lineno(loc,strg):
  936. """Returns current line number within a string, counting newlines as line separators.
  937. The first line is number 1.
  938. Note - the default parsing behavior is to expand tabs in the input string
  939. before starting the parsing process. See :class:`ParserElement.parseString`
  940. for more information on parsing strings containing ``<TAB>`` s, and
  941. suggested methods to maintain a consistent view of the parsed string, the
  942. parse location, and line and column positions within the parsed string.
  943. """
  944. return strg.count("\n",0,loc) + 1
  945. def line( loc, strg ):
  946. """Returns the line of text containing loc within a string, counting newlines as line separators.
  947. """
  948. lastCR = strg.rfind("\n", 0, loc)
  949. nextCR = strg.find("\n", loc)
  950. if nextCR >= 0:
  951. return strg[lastCR+1:nextCR]
  952. else:
  953. return strg[lastCR+1:]
  954. def _defaultStartDebugAction( instring, loc, expr ):
  955. print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
  956. def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
  957. print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
  958. def _defaultExceptionDebugAction( instring, loc, expr, exc ):
  959. print ("Exception raised:" + _ustr(exc))
  960. def nullDebugAction(*args):
  961. """'Do-nothing' debug action, to suppress debugging output during parsing."""
  962. pass
  963. # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
  964. #~ 'decorator to trim function calls to match the arity of the target'
  965. #~ def _trim_arity(func, maxargs=3):
  966. #~ if func in singleArgBuiltins:
  967. #~ return lambda s,l,t: func(t)
  968. #~ limit = 0
  969. #~ foundArity = False
  970. #~ def wrapper(*args):
  971. #~ nonlocal limit,foundArity
  972. #~ while 1:
  973. #~ try:
  974. #~ ret = func(*args[limit:])
  975. #~ foundArity = True
  976. #~ return ret
  977. #~ except TypeError:
  978. #~ if limit == maxargs or foundArity:
  979. #~ raise
  980. #~ limit += 1
  981. #~ continue
  982. #~ return wrapper
  983. # this version is Python 2.x-3.x cross-compatible
  984. 'decorator to trim function calls to match the arity of the target'
  985. def _trim_arity(func, maxargs=2):
  986. if func in singleArgBuiltins:
  987. return lambda s,l,t: func(t)
  988. limit = [0]
  989. foundArity = [False]
  990. # traceback return data structure changed in Py3.5 - normalize back to plain tuples
  991. if system_version[:2] >= (3,5):
  992. def extract_stack(limit=0):
  993. # special handling for Python 3.5.0 - extra deep call stack by 1
  994. offset = -3 if system_version == (3,5,0) else -2
  995. frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
  996. return [frame_summary[:2]]
  997. def extract_tb(tb, limit=0):
  998. frames = traceback.extract_tb(tb, limit=limit)
  999. frame_summary = frames[-1]
  1000. return [frame_summary[:2]]
  1001. else:
  1002. extract_stack = traceback.extract_stack
  1003. extract_tb = traceback.extract_tb
  1004. # synthesize what would be returned by traceback.extract_stack at the call to
  1005. # user's parse action 'func', so that we don't incur call penalty at parse time
  1006. LINE_DIFF = 6
  1007. # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
  1008. # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
  1009. this_line = extract_stack(limit=2)[-1]
  1010. pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
  1011. def wrapper(*args):
  1012. while 1:
  1013. try:
  1014. ret = func(*args[limit[0]:])
  1015. foundArity[0] = True
  1016. return ret
  1017. except TypeError:
  1018. # re-raise TypeErrors if they did not come from our arity testing
  1019. if foundArity[0]:
  1020. raise
  1021. else:
  1022. try:
  1023. tb = sys.exc_info()[-1]
  1024. if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
  1025. raise
  1026. finally:
  1027. del tb
  1028. if limit[0] <= maxargs:
  1029. limit[0] += 1
  1030. continue
  1031. raise
  1032. # copy func name to wrapper for sensible debug output
  1033. func_name = "<parse action>"
  1034. try:
  1035. func_name = getattr(func, '__name__',
  1036. getattr(func, '__class__').__name__)
  1037. except Exception:
  1038. func_name = str(func)
  1039. wrapper.__name__ = func_name
  1040. return wrapper
  1041. class ParserElement(object):
  1042. """Abstract base level parser element class."""
  1043. DEFAULT_WHITE_CHARS = " \n\t\r"
  1044. verbose_stacktrace = False
  1045. @staticmethod
  1046. def setDefaultWhitespaceChars( chars ):
  1047. r"""
  1048. Overrides the default whitespace chars
  1049. Example::
  1050. # default whitespace chars are space, <TAB> and newline
  1051. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
  1052. # change to just treat newline as significant
  1053. ParserElement.setDefaultWhitespaceChars(" \t")
  1054. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
  1055. """
  1056. ParserElement.DEFAULT_WHITE_CHARS = chars
  1057. @staticmethod
  1058. def inlineLiteralsUsing(cls):
  1059. """
  1060. Set class to be used for inclusion of string literals into a parser.
  1061. Example::
  1062. # default literal class used is Literal
  1063. integer = Word(nums)
  1064. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1065. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1066. # change to Suppress
  1067. ParserElement.inlineLiteralsUsing(Suppress)
  1068. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1069. date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
  1070. """
  1071. ParserElement._literalStringClass = cls
  1072. def __init__( self, savelist=False ):
  1073. self.parseAction = list()
  1074. self.failAction = None
  1075. #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
  1076. self.strRepr = None
  1077. self.resultsName = None
  1078. self.saveAsList = savelist
  1079. self.skipWhitespace = True
  1080. self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
  1081. self.copyDefaultWhiteChars = True
  1082. self.mayReturnEmpty = False # used when checking for left-recursion
  1083. self.keepTabs = False
  1084. self.ignoreExprs = list()
  1085. self.debug = False
  1086. self.streamlined = False
  1087. self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
  1088. self.errmsg = ""
  1089. self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
  1090. self.debugActions = ( None, None, None ) #custom debug actions
  1091. self.re = None
  1092. self.callPreparse = True # used to avoid redundant calls to preParse
  1093. self.callDuringTry = False
  1094. def copy( self ):
  1095. """
  1096. Make a copy of this :class:`ParserElement`. Useful for defining
  1097. different parse actions for the same parsing pattern, using copies of
  1098. the original parse element.
  1099. Example::
  1100. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1101. integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
  1102. integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1103. print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
  1104. prints::
  1105. [5120, 100, 655360, 268435456]
  1106. Equivalent form of ``expr.copy()`` is just ``expr()``::
  1107. integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1108. """
  1109. cpy = copy.copy( self )
  1110. cpy.parseAction = self.parseAction[:]
  1111. cpy.ignoreExprs = self.ignoreExprs[:]
  1112. if self.copyDefaultWhiteChars:
  1113. cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1114. return cpy
  1115. def setName( self, name ):
  1116. """
  1117. Define name for this expression, makes debugging and exception messages clearer.
  1118. Example::
  1119. Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
  1120. Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
  1121. """
  1122. self.name = name
  1123. self.errmsg = "Expected " + self.name
  1124. if hasattr(self,"exception"):
  1125. self.exception.msg = self.errmsg
  1126. return self
  1127. def setResultsName( self, name, listAllMatches=False ):
  1128. """
  1129. Define name for referencing matching tokens as a nested attribute
  1130. of the returned parse results.
  1131. NOTE: this returns a *copy* of the original :class:`ParserElement` object;
  1132. this is so that the client can define a basic element, such as an
  1133. integer, and reference it in multiple places with different names.
  1134. You can also set results names using the abbreviated syntax,
  1135. ``expr("name")`` in place of ``expr.setResultsName("name")``
  1136. - see :class:`__call__`.
  1137. Example::
  1138. date_str = (integer.setResultsName("year") + '/'
  1139. + integer.setResultsName("month") + '/'
  1140. + integer.setResultsName("day"))
  1141. # equivalent form:
  1142. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1143. """
  1144. newself = self.copy()
  1145. if name.endswith("*"):
  1146. name = name[:-1]
  1147. listAllMatches=True
  1148. newself.resultsName = name
  1149. newself.modalResults = not listAllMatches
  1150. return newself
  1151. def setBreak(self,breakFlag = True):
  1152. """Method to invoke the Python pdb debugger when this element is
  1153. about to be parsed. Set ``breakFlag`` to True to enable, False to
  1154. disable.
  1155. """
  1156. if breakFlag:
  1157. _parseMethod = self._parse
  1158. def breaker(instring, loc, doActions=True, callPreParse=True):
  1159. import pdb
  1160. pdb.set_trace()
  1161. return _parseMethod( instring, loc, doActions, callPreParse )
  1162. breaker._originalParseMethod = _parseMethod
  1163. self._parse = breaker
  1164. else:
  1165. if hasattr(self._parse,"_originalParseMethod"):
  1166. self._parse = self._parse._originalParseMethod
  1167. return self
  1168. def setParseAction( self, *fns, **kwargs ):
  1169. """
  1170. Define one or more actions to perform when successfully matching parse element definition.
  1171. Parse action fn is a callable method with 0-3 arguments, called as ``fn(s,loc,toks)`` ,
  1172. ``fn(loc,toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
  1173. - s = the original string being parsed (see note below)
  1174. - loc = the location of the matching substring
  1175. - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
  1176. If the functions in fns modify the tokens, they can return them as the return
  1177. value from fn, and the modified list of tokens will replace the original.
  1178. Otherwise, fn does not need to return any value.
  1179. Optional keyword arguments:
  1180. - callDuringTry = (default= ``False`` ) indicate if parse action should be run during lookaheads and alternate testing
  1181. Note: the default parsing behavior is to expand tabs in the input string
  1182. before starting the parsing process. See :class:`parseString for more
  1183. information on parsing strings containing ``<TAB>`` s, and suggested
  1184. methods to maintain a consistent view of the parsed string, the parse
  1185. location, and line and column positions within the parsed string.
  1186. Example::
  1187. integer = Word(nums)
  1188. date_str = integer + '/' + integer + '/' + integer
  1189. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1190. # use parse action to convert to ints at parse time
  1191. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1192. date_str = integer + '/' + integer + '/' + integer
  1193. # note that integer fields are now ints, not strings
  1194. date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
  1195. """
  1196. self.parseAction = list(map(_trim_arity, list(fns)))
  1197. self.callDuringTry = kwargs.get("callDuringTry", False)
  1198. return self
  1199. def addParseAction( self, *fns, **kwargs ):
  1200. """
  1201. Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
  1202. See examples in :class:`copy`.
  1203. """
  1204. self.parseAction += list(map(_trim_arity, list(fns)))
  1205. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1206. return self
  1207. def addCondition(self, *fns, **kwargs):
  1208. """Add a boolean predicate function to expression's list of parse actions. See
  1209. :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
  1210. functions passed to ``addCondition`` need to return boolean success/fail of the condition.
  1211. Optional keyword arguments:
  1212. - message = define a custom message to be used in the raised exception
  1213. - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
  1214. Example::
  1215. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1216. year_int = integer.copy()
  1217. year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
  1218. date_str = year_int + '/' + integer + '/' + integer
  1219. result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
  1220. """
  1221. msg = kwargs.get("message", "failed user-defined condition")
  1222. exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
  1223. for fn in fns:
  1224. fn = _trim_arity(fn)
  1225. def pa(s,l,t):
  1226. if not bool(fn(s,l,t)):
  1227. raise exc_type(s,l,msg)
  1228. self.parseAction.append(pa)
  1229. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1230. return self
  1231. def setFailAction( self, fn ):
  1232. """Define action to perform if parsing fails at this expression.
  1233. Fail acton fn is a callable function that takes the arguments
  1234. ``fn(s,loc,expr,err)`` where:
  1235. - s = string being parsed
  1236. - loc = location where expression match was attempted and failed
  1237. - expr = the parse expression that failed
  1238. - err = the exception thrown
  1239. The function returns no value. It may throw :class:`ParseFatalException`
  1240. if it is desired to stop parsing immediately."""
  1241. self.failAction = fn
  1242. return self
  1243. def _skipIgnorables( self, instring, loc ):
  1244. exprsFound = True
  1245. while exprsFound:
  1246. exprsFound = False
  1247. for e in self.ignoreExprs:
  1248. try:
  1249. while 1:
  1250. loc,dummy = e._parse( instring, loc )
  1251. exprsFound = True
  1252. except ParseException:
  1253. pass
  1254. return loc
  1255. def preParse( self, instring, loc ):
  1256. if self.ignoreExprs:
  1257. loc = self._skipIgnorables( instring, loc )
  1258. if self.skipWhitespace:
  1259. wt = self.whiteChars
  1260. instrlen = len(instring)
  1261. while loc < instrlen and instring[loc] in wt:
  1262. loc += 1
  1263. return loc
  1264. def parseImpl( self, instring, loc, doActions=True ):
  1265. return loc, []
  1266. def postParse( self, instring, loc, tokenlist ):
  1267. return tokenlist
  1268. #~ @profile
  1269. def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
  1270. debugging = ( self.debug ) #and doActions )
  1271. if debugging or self.failAction:
  1272. #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
  1273. if (self.debugActions[0] ):
  1274. self.debugActions[0]( instring, loc, self )
  1275. if callPreParse and self.callPreparse:
  1276. preloc = self.preParse( instring, loc )
  1277. else:
  1278. preloc = loc
  1279. tokensStart = preloc
  1280. try:
  1281. try:
  1282. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1283. except IndexError:
  1284. raise ParseException( instring, len(instring), self.errmsg, self )
  1285. except ParseBaseException as err:
  1286. #~ print ("Exception raised:", err)
  1287. if self.debugActions[2]:
  1288. self.debugActions[2]( instring, tokensStart, self, err )
  1289. if self.failAction:
  1290. self.failAction( instring, tokensStart, self, err )
  1291. raise
  1292. else:
  1293. if callPreParse and self.callPreparse:
  1294. preloc = self.preParse( instring, loc )
  1295. else:
  1296. preloc = loc
  1297. tokensStart = preloc
  1298. if self.mayIndexError or preloc >= len(instring):
  1299. try:
  1300. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1301. except IndexError:
  1302. raise ParseException( instring, len(instring), self.errmsg, self )
  1303. else:
  1304. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1305. tokens = self.postParse( instring, loc, tokens )
  1306. retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
  1307. if self.parseAction and (doActions or self.callDuringTry):
  1308. if debugging:
  1309. try:
  1310. for fn in self.parseAction:
  1311. try:
  1312. tokens = fn( instring, tokensStart, retTokens )
  1313. except IndexError as parse_action_exc:
  1314. exc = ParseException("exception raised in parse action")
  1315. exc.__cause__ = parse_action_exc
  1316. raise exc
  1317. if tokens is not None and tokens is not retTokens:
  1318. retTokens = ParseResults( tokens,
  1319. self.resultsName,
  1320. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1321. modal=self.modalResults )
  1322. except ParseBaseException as err:
  1323. #~ print "Exception raised in user parse action:", err
  1324. if (self.debugActions[2] ):
  1325. self.debugActions[2]( instring, tokensStart, self, err )
  1326. raise
  1327. else:
  1328. for fn in self.parseAction:
  1329. try:
  1330. tokens = fn( instring, tokensStart, retTokens )
  1331. except IndexError as parse_action_exc:
  1332. exc = ParseException("exception raised in parse action")
  1333. exc.__cause__ = parse_action_exc
  1334. raise exc
  1335. if tokens is not None and tokens is not retTokens:
  1336. retTokens = ParseResults( tokens,
  1337. self.resultsName,
  1338. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1339. modal=self.modalResults )
  1340. if debugging:
  1341. #~ print ("Matched",self,"->",retTokens.asList())
  1342. if (self.debugActions[1] ):
  1343. self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
  1344. return loc, retTokens
  1345. def tryParse( self, instring, loc ):
  1346. try:
  1347. return self._parse( instring, loc, doActions=False )[0]
  1348. except ParseFatalException:
  1349. raise ParseException( instring, loc, self.errmsg, self)
  1350. def canParseNext(self, instring, loc):
  1351. try:
  1352. self.tryParse(instring, loc)
  1353. except (ParseException, IndexError):
  1354. return False
  1355. else:
  1356. return True
  1357. class _UnboundedCache(object):
  1358. def __init__(self):
  1359. cache = {}
  1360. self.not_in_cache = not_in_cache = object()
  1361. def get(self, key):
  1362. return cache.get(key, not_in_cache)
  1363. def set(self, key, value):
  1364. cache[key] = value
  1365. def clear(self):
  1366. cache.clear()
  1367. def cache_len(self):
  1368. return len(cache)
  1369. self.get = types.MethodType(get, self)
  1370. self.set = types.MethodType(set, self)
  1371. self.clear = types.MethodType(clear, self)
  1372. self.__len__ = types.MethodType(cache_len, self)
  1373. if _OrderedDict is not None:
  1374. class _FifoCache(object):
  1375. def __init__(self, size):
  1376. self.not_in_cache = not_in_cache = object()
  1377. cache = _OrderedDict()
  1378. def get(self, key):
  1379. return cache.get(key, not_in_cache)
  1380. def set(self, key, value):
  1381. cache[key] = value
  1382. while len(cache) > size:
  1383. try:
  1384. cache.popitem(False)
  1385. except KeyError:
  1386. pass
  1387. def clear(self):
  1388. cache.clear()
  1389. def cache_len(self):
  1390. return len(cache)
  1391. self.get = types.MethodType(get, self)
  1392. self.set = types.MethodType(set, self)
  1393. self.clear = types.MethodType(clear, self)
  1394. self.__len__ = types.MethodType(cache_len, self)
  1395. else:
  1396. class _FifoCache(object):
  1397. def __init__(self, size):
  1398. self.not_in_cache = not_in_cache = object()
  1399. cache = {}
  1400. key_fifo = collections.deque([], size)
  1401. def get(self, key):
  1402. return cache.get(key, not_in_cache)
  1403. def set(self, key, value):
  1404. cache[key] = value
  1405. while len(key_fifo) > size:
  1406. cache.pop(key_fifo.popleft(), None)
  1407. key_fifo.append(key)
  1408. def clear(self):
  1409. cache.clear()
  1410. key_fifo.clear()
  1411. def cache_len(self):
  1412. return len(cache)
  1413. self.get = types.MethodType(get, self)
  1414. self.set = types.MethodType(set, self)
  1415. self.clear = types.MethodType(clear, self)
  1416. self.__len__ = types.MethodType(cache_len, self)
  1417. # argument cache for optimizing repeated calls when backtracking through recursive expressions
  1418. packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
  1419. packrat_cache_lock = RLock()
  1420. packrat_cache_stats = [0, 0]
  1421. # this method gets repeatedly called during backtracking with the same arguments -
  1422. # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
  1423. def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
  1424. HIT, MISS = 0, 1
  1425. lookup = (self, instring, loc, callPreParse, doActions)
  1426. with ParserElement.packrat_cache_lock:
  1427. cache = ParserElement.packrat_cache
  1428. value = cache.get(lookup)
  1429. if value is cache.not_in_cache:
  1430. ParserElement.packrat_cache_stats[MISS] += 1
  1431. try:
  1432. value = self._parseNoCache(instring, loc, doActions, callPreParse)
  1433. except ParseBaseException as pe:
  1434. # cache a copy of the exception, without the traceback
  1435. cache.set(lookup, pe.__class__(*pe.args))
  1436. raise
  1437. else:
  1438. cache.set(lookup, (value[0], value[1].copy()))
  1439. return value
  1440. else:
  1441. ParserElement.packrat_cache_stats[HIT] += 1
  1442. if isinstance(value, Exception):
  1443. raise value
  1444. return (value[0], value[1].copy())
  1445. _parse = _parseNoCache
  1446. @staticmethod
  1447. def resetCache():
  1448. ParserElement.packrat_cache.clear()
  1449. ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
  1450. _packratEnabled = False
  1451. @staticmethod
  1452. def enablePackrat(cache_size_limit=128):
  1453. """Enables "packrat" parsing, which adds memoizing to the parsing logic.
  1454. Repeated parse attempts at the same string location (which happens
  1455. often in many complex grammars) can immediately return a cached value,
  1456. instead of re-executing parsing/validating code. Memoizing is done of
  1457. both valid results and parsing exceptions.
  1458. Parameters:
  1459. - cache_size_limit - (default= ``128``) - if an integer value is provided
  1460. will limit the size of the packrat cache; if None is passed, then
  1461. the cache size will be unbounded; if 0 is passed, the cache will
  1462. be effectively disabled.
  1463. This speedup may break existing programs that use parse actions that
  1464. have side-effects. For this reason, packrat parsing is disabled when
  1465. you first import pyparsing. To activate the packrat feature, your
  1466. program must call the class method :class:`ParserElement.enablePackrat`.
  1467. For best results, call ``enablePackrat()`` immediately after
  1468. importing pyparsing.
  1469. Example::
  1470. from pip._vendor import pyparsing
  1471. pyparsing.ParserElement.enablePackrat()
  1472. """
  1473. if not ParserElement._packratEnabled:
  1474. ParserElement._packratEnabled = True
  1475. if cache_size_limit is None:
  1476. ParserElement.packrat_cache = ParserElement._UnboundedCache()
  1477. else:
  1478. ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
  1479. ParserElement._parse = ParserElement._parseCache
  1480. def parseString( self, instring, parseAll=False ):
  1481. """
  1482. Execute the parse expression with the given string.
  1483. This is the main interface to the client code, once the complete
  1484. expression has been built.
  1485. If you want the grammar to require that the entire input string be
  1486. successfully parsed, then set ``parseAll`` to True (equivalent to ending
  1487. the grammar with ``StringEnd()``).
  1488. Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
  1489. in order to report proper column numbers in parse actions.
  1490. If the input string contains tabs and
  1491. the grammar uses parse actions that use the ``loc`` argument to index into the
  1492. string being parsed, you can ensure you have a consistent view of the input
  1493. string by:
  1494. - calling ``parseWithTabs`` on your grammar before calling ``parseString``
  1495. (see :class:`parseWithTabs`)
  1496. - define your parse action using the full ``(s,loc,toks)`` signature, and
  1497. reference the input string using the parse action's ``s`` argument
  1498. - explictly expand the tabs in your input string before calling
  1499. ``parseString``
  1500. Example::
  1501. Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
  1502. Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
  1503. """
  1504. ParserElement.resetCache()
  1505. if not self.streamlined:
  1506. self.streamline()
  1507. #~ self.saveAsList = True
  1508. for e in self.ignoreExprs:
  1509. e.streamline()
  1510. if not self.keepTabs:
  1511. instring = instring.expandtabs()
  1512. try:
  1513. loc, tokens = self._parse( instring, 0 )
  1514. if parseAll:
  1515. loc = self.preParse( instring, loc )
  1516. se = Empty() + StringEnd()
  1517. se._parse( instring, loc )
  1518. except ParseBaseException as exc:
  1519. if ParserElement.verbose_stacktrace:
  1520. raise
  1521. else:
  1522. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1523. raise exc
  1524. else:
  1525. return tokens
  1526. def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
  1527. """
  1528. Scan the input string for expression matches. Each match will return the
  1529. matching tokens, start location, and end location. May be called with optional
  1530. ``maxMatches`` argument, to clip scanning after 'n' matches are found. If
  1531. ``overlap`` is specified, then overlapping matches will be reported.
  1532. Note that the start and end locations are reported relative to the string
  1533. being parsed. See :class:`parseString` for more information on parsing
  1534. strings with embedded tabs.
  1535. Example::
  1536. source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
  1537. print(source)
  1538. for tokens,start,end in Word(alphas).scanString(source):
  1539. print(' '*start + '^'*(end-start))
  1540. print(' '*start + tokens[0])
  1541. prints::
  1542. sldjf123lsdjjkf345sldkjf879lkjsfd987
  1543. ^^^^^
  1544. sldjf
  1545. ^^^^^^^
  1546. lsdjjkf
  1547. ^^^^^^
  1548. sldkjf
  1549. ^^^^^^
  1550. lkjsfd
  1551. """
  1552. if not self.streamlined:
  1553. self.streamline()
  1554. for e in self.ignoreExprs:
  1555. e.streamline()
  1556. if not self.keepTabs:
  1557. instring = _ustr(instring).expandtabs()
  1558. instrlen = len(instring)
  1559. loc = 0
  1560. preparseFn = self.preParse
  1561. parseFn = self._parse
  1562. ParserElement.resetCache()
  1563. matches = 0
  1564. try:
  1565. while loc <= instrlen and matches < maxMatches:
  1566. try:
  1567. preloc = preparseFn( instring, loc )
  1568. nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
  1569. except ParseException:
  1570. loc = preloc+1
  1571. else:
  1572. if nextLoc > loc:
  1573. matches += 1
  1574. yield tokens, preloc, nextLoc
  1575. if overlap:
  1576. nextloc = preparseFn( instring, loc )
  1577. if nextloc > loc:
  1578. loc = nextLoc
  1579. else:
  1580. loc += 1
  1581. else:
  1582. loc = nextLoc
  1583. else:
  1584. loc = preloc+1
  1585. except ParseBaseException as exc:
  1586. if ParserElement.verbose_stacktrace:
  1587. raise
  1588. else:
  1589. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1590. raise exc
  1591. def transformString( self, instring ):
  1592. """
  1593. Extension to :class:`scanString`, to modify matching text with modified tokens that may
  1594. be returned from a parse action. To use ``transformString``, define a grammar and
  1595. attach a parse action to it that modifies the returned token list.
  1596. Invoking ``transformString()`` on a target string will then scan for matches,
  1597. and replace the matched text patterns according to the logic in the parse
  1598. action. ``transformString()`` returns the resulting transformed string.
  1599. Example::
  1600. wd = Word(alphas)
  1601. wd.setParseAction(lambda toks: toks[0].title())
  1602. print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
  1603. prints::
  1604. Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
  1605. """
  1606. out = []
  1607. lastE = 0
  1608. # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
  1609. # keep string locs straight between transformString and scanString
  1610. self.keepTabs = True
  1611. try:
  1612. for t,s,e in self.scanString( instring ):
  1613. out.append( instring[lastE:s] )
  1614. if t:
  1615. if isinstance(t,ParseResults):
  1616. out += t.asList()
  1617. elif isinstance(t,list):
  1618. out += t
  1619. else:
  1620. out.append(t)
  1621. lastE = e
  1622. out.append(instring[lastE:])
  1623. out = [o for o in out if o]
  1624. return "".join(map(_ustr,_flatten(out)))
  1625. except ParseBaseException as exc:
  1626. if ParserElement.verbose_stacktrace:
  1627. raise
  1628. else:
  1629. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1630. raise exc
  1631. def searchString( self, instring, maxMatches=_MAX_INT ):
  1632. """
  1633. Another extension to :class:`scanString`, simplifying the access to the tokens found
  1634. to match the given parse expression. May be called with optional
  1635. ``maxMatches`` argument, to clip searching after 'n' matches are found.
  1636. Example::
  1637. # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
  1638. cap_word = Word(alphas.upper(), alphas.lower())
  1639. print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
  1640. # the sum() builtin can be used to merge results into a single ParseResults object
  1641. print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
  1642. prints::
  1643. [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
  1644. ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
  1645. """
  1646. try:
  1647. return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
  1648. except ParseBaseException as exc:
  1649. if ParserElement.verbose_stacktrace:
  1650. raise
  1651. else:
  1652. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1653. raise exc
  1654. def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
  1655. """
  1656. Generator method to split a string using the given expression as a separator.
  1657. May be called with optional ``maxsplit`` argument, to limit the number of splits;
  1658. and the optional ``includeSeparators`` argument (default= ``False``), if the separating
  1659. matching text should be included in the split results.
  1660. Example::
  1661. punc = oneOf(list(".,;:/-!?"))
  1662. print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
  1663. prints::
  1664. ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
  1665. """
  1666. splits = 0
  1667. last = 0
  1668. for t,s,e in self.scanString(instring, maxMatches=maxsplit):
  1669. yield instring[last:s]
  1670. if includeSeparators:
  1671. yield t[0]
  1672. last = e
  1673. yield instring[last:]
  1674. def __add__(self, other ):
  1675. """
  1676. Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
  1677. converts them to :class:`Literal`s by default.
  1678. Example::
  1679. greet = Word(alphas) + "," + Word(alphas) + "!"
  1680. hello = "Hello, World!"
  1681. print (hello, "->", greet.parseString(hello))
  1682. prints::
  1683. Hello, World! -> ['Hello', ',', 'World', '!']
  1684. """
  1685. if isinstance( other, basestring ):
  1686. other = ParserElement._literalStringClass( other )
  1687. if not isinstance( other, ParserElement ):
  1688. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1689. SyntaxWarning, stacklevel=2)
  1690. return None
  1691. return And( [ self, other ] )
  1692. def __radd__(self, other ):
  1693. """
  1694. Implementation of + operator when left operand is not a :class:`ParserElement`
  1695. """
  1696. if isinstance( other, basestring ):
  1697. other = ParserElement._literalStringClass( other )
  1698. if not isinstance( other, ParserElement ):
  1699. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1700. SyntaxWarning, stacklevel=2)
  1701. return None
  1702. return other + self
  1703. def __sub__(self, other):
  1704. """
  1705. Implementation of - operator, returns :class:`And` with error stop
  1706. """
  1707. if isinstance( other, basestring ):
  1708. other = ParserElement._literalStringClass( other )
  1709. if not isinstance( other, ParserElement ):
  1710. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1711. SyntaxWarning, stacklevel=2)
  1712. return None
  1713. return self + And._ErrorStop() + other
  1714. def __rsub__(self, other ):
  1715. """
  1716. Implementation of - operator when left operand is not a :class:`ParserElement`
  1717. """
  1718. if isinstance( other, basestring ):
  1719. other = ParserElement._literalStringClass( other )
  1720. if not isinstance( other, ParserElement ):
  1721. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1722. SyntaxWarning, stacklevel=2)
  1723. return None
  1724. return other - self
  1725. def __mul__(self,other):
  1726. """
  1727. Implementation of * operator, allows use of ``expr * 3`` in place of
  1728. ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer
  1729. tuple, similar to ``{min,max}`` multipliers in regular expressions. Tuples
  1730. may also include ``None`` as in:
  1731. - ``expr*(n,None)`` or ``expr*(n,)`` is equivalent
  1732. to ``expr*n + ZeroOrMore(expr)``
  1733. (read as "at least n instances of ``expr``")
  1734. - ``expr*(None,n)`` is equivalent to ``expr*(0,n)``
  1735. (read as "0 to n instances of ``expr``")
  1736. - ``expr*(None,None)`` is equivalent to ``ZeroOrMore(expr)``
  1737. - ``expr*(1,None)`` is equivalent to ``OneOrMore(expr)``
  1738. Note that ``expr*(None,n)`` does not raise an exception if
  1739. more than n exprs exist in the input stream; that is,
  1740. ``expr*(None,n)`` does not enforce a maximum number of expr
  1741. occurrences. If this behavior is desired, then write
  1742. ``expr*(None,n) + ~expr``
  1743. """
  1744. if isinstance(other,int):
  1745. minElements, optElements = other,0
  1746. elif isinstance(other,tuple):
  1747. other = (other + (None, None))[:2]
  1748. if other[0] is None:
  1749. other = (0, other[1])
  1750. if isinstance(other[0],int) and other[1] is None:
  1751. if other[0] == 0:
  1752. return ZeroOrMore(self)
  1753. if other[0] == 1:
  1754. return OneOrMore(self)
  1755. else:
  1756. return self*other[0] + ZeroOrMore(self)
  1757. elif isinstance(other[0],int) and isinstance(other[1],int):
  1758. minElements, optElements = other
  1759. optElements -= minElements
  1760. else:
  1761. raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
  1762. else:
  1763. raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
  1764. if minElements < 0:
  1765. raise ValueError("cannot multiply ParserElement by negative value")
  1766. if optElements < 0:
  1767. raise ValueError("second tuple value must be greater or equal to first tuple value")
  1768. if minElements == optElements == 0:
  1769. raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
  1770. if (optElements):
  1771. def makeOptionalList(n):
  1772. if n>1:
  1773. return Optional(self + makeOptionalList(n-1))
  1774. else:
  1775. return Optional(self)
  1776. if minElements:
  1777. if minElements == 1:
  1778. ret = self + makeOptionalList(optElements)
  1779. else:
  1780. ret = And([self]*minElements) + makeOptionalList(optElements)
  1781. else:
  1782. ret = makeOptionalList(optElements)
  1783. else:
  1784. if minElements == 1:
  1785. ret = self
  1786. else:
  1787. ret = And([self]*minElements)
  1788. return ret
  1789. def __rmul__(self, other):
  1790. return self.__mul__(other)
  1791. def __or__(self, other ):
  1792. """
  1793. Implementation of | operator - returns :class:`MatchFirst`
  1794. """
  1795. if isinstance( other, basestring ):
  1796. other = ParserElement._literalStringClass( other )
  1797. if not isinstance( other, ParserElement ):
  1798. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1799. SyntaxWarning, stacklevel=2)
  1800. return None
  1801. return MatchFirst( [ self, other ] )
  1802. def __ror__(self, other ):
  1803. """
  1804. Implementation of | operator when left operand is not a :class:`ParserElement`
  1805. """
  1806. if isinstance( other, basestring ):
  1807. other = ParserElement._literalStringClass( other )
  1808. if not isinstance( other, ParserElement ):
  1809. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1810. SyntaxWarning, stacklevel=2)
  1811. return None
  1812. return other | self
  1813. def __xor__(self, other ):
  1814. """
  1815. Implementation of ^ operator - returns :class:`Or`
  1816. """
  1817. if isinstance( other, basestring ):
  1818. other = ParserElement._literalStringClass( other )
  1819. if not isinstance( other, ParserElement ):
  1820. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1821. SyntaxWarning, stacklevel=2)
  1822. return None
  1823. return Or( [ self, other ] )
  1824. def __rxor__(self, other ):
  1825. """
  1826. Implementation of ^ operator when left operand is not a :class:`ParserElement`
  1827. """
  1828. if isinstance( other, basestring ):
  1829. other = ParserElement._literalStringClass( other )
  1830. if not isinstance( other, ParserElement ):
  1831. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1832. SyntaxWarning, stacklevel=2)
  1833. return None
  1834. return other ^ self
  1835. def __and__(self, other ):
  1836. """
  1837. Implementation of & operator - returns :class:`Each`
  1838. """
  1839. if isinstance( other, basestring ):
  1840. other = ParserElement._literalStringClass( other )
  1841. if not isinstance( other, ParserElement ):
  1842. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1843. SyntaxWarning, stacklevel=2)
  1844. return None
  1845. return Each( [ self, other ] )
  1846. def __rand__(self, other ):
  1847. """
  1848. Implementation of & operator when left operand is not a :class:`ParserElement`
  1849. """
  1850. if isinstance( other, basestring ):
  1851. other = ParserElement._literalStringClass( other )
  1852. if not isinstance( other, ParserElement ):
  1853. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1854. SyntaxWarning, stacklevel=2)
  1855. return None
  1856. return other & self
  1857. def __invert__( self ):
  1858. """
  1859. Implementation of ~ operator - returns :class:`NotAny`
  1860. """
  1861. return NotAny( self )
  1862. def __call__(self, name=None):
  1863. """
  1864. Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
  1865. If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
  1866. passed as ``True``.
  1867. If ``name` is omitted, same as calling :class:`copy`.
  1868. Example::
  1869. # these are equivalent
  1870. userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
  1871. userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
  1872. """
  1873. if name is not None:
  1874. return self.setResultsName(name)
  1875. else:
  1876. return self.copy()
  1877. def suppress( self ):
  1878. """
  1879. Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
  1880. cluttering up returned output.
  1881. """
  1882. return Suppress( self )
  1883. def leaveWhitespace( self ):
  1884. """
  1885. Disables the skipping of whitespace before matching the characters in the
  1886. :class:`ParserElement`'s defined pattern. This is normally only used internally by
  1887. the pyparsing module, but may be needed in some whitespace-sensitive grammars.
  1888. """
  1889. self.skipWhitespace = False
  1890. return self
  1891. def setWhitespaceChars( self, chars ):
  1892. """
  1893. Overrides the default whitespace chars
  1894. """
  1895. self.skipWhitespace = True
  1896. self.whiteChars = chars
  1897. self.copyDefaultWhiteChars = False
  1898. return self
  1899. def parseWithTabs( self ):
  1900. """
  1901. Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
  1902. Must be called before ``parseString`` when the input grammar contains elements that
  1903. match ``<TAB>`` characters.
  1904. """
  1905. self.keepTabs = True
  1906. return self
  1907. def ignore( self, other ):
  1908. """
  1909. Define expression to be ignored (e.g., comments) while doing pattern
  1910. matching; may be called repeatedly, to define multiple comment or other
  1911. ignorable patterns.
  1912. Example::
  1913. patt = OneOrMore(Word(alphas))
  1914. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
  1915. patt.ignore(cStyleComment)
  1916. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
  1917. """
  1918. if isinstance(other, basestring):
  1919. other = Suppress(other)
  1920. if isinstance( other, Suppress ):
  1921. if other not in self.ignoreExprs:
  1922. self.ignoreExprs.append(other)
  1923. else:
  1924. self.ignoreExprs.append( Suppress( other.copy() ) )
  1925. return self
  1926. def setDebugActions( self, startAction, successAction, exceptionAction ):
  1927. """
  1928. Enable display of debugging messages while doing pattern matching.
  1929. """
  1930. self.debugActions = (startAction or _defaultStartDebugAction,
  1931. successAction or _defaultSuccessDebugAction,
  1932. exceptionAction or _defaultExceptionDebugAction)
  1933. self.debug = True
  1934. return self
  1935. def setDebug( self, flag=True ):
  1936. """
  1937. Enable display of debugging messages while doing pattern matching.
  1938. Set ``flag`` to True to enable, False to disable.
  1939. Example::
  1940. wd = Word(alphas).setName("alphaword")
  1941. integer = Word(nums).setName("numword")
  1942. term = wd | integer
  1943. # turn on debugging for wd
  1944. wd.setDebug()
  1945. OneOrMore(term).parseString("abc 123 xyz 890")
  1946. prints::
  1947. Match alphaword at loc 0(1,1)
  1948. Matched alphaword -> ['abc']
  1949. Match alphaword at loc 3(1,4)
  1950. Exception raised:Expected alphaword (at char 4), (line:1, col:5)
  1951. Match alphaword at loc 7(1,8)
  1952. Matched alphaword -> ['xyz']
  1953. Match alphaword at loc 11(1,12)
  1954. Exception raised:Expected alphaword (at char 12), (line:1, col:13)
  1955. Match alphaword at loc 15(1,16)
  1956. Exception raised:Expected alphaword (at char 15), (line:1, col:16)
  1957. The output shown is that produced by the default debug actions - custom debug actions can be
  1958. specified using :class:`setDebugActions`. Prior to attempting
  1959. to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
  1960. is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
  1961. message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
  1962. which makes debugging and exception messages easier to understand - for instance, the default
  1963. name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
  1964. """
  1965. if flag:
  1966. self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  1967. else:
  1968. self.debug = False
  1969. return self
  1970. def __str__( self ):
  1971. return self.name
  1972. def __repr__( self ):
  1973. return _ustr(self)
  1974. def streamline( self ):
  1975. self.streamlined = True
  1976. self.strRepr = None
  1977. return self
  1978. def checkRecursion( self, parseElementList ):
  1979. pass
  1980. def validate( self, validateTrace=[] ):
  1981. """
  1982. Check defined expressions for valid structure, check for infinite recursive definitions.
  1983. """
  1984. self.checkRecursion( [] )
  1985. def parseFile( self, file_or_filename, parseAll=False ):
  1986. """
  1987. Execute the parse expression on the given file or filename.
  1988. If a filename is specified (instead of a file object),
  1989. the entire file is opened, read, and closed before parsing.
  1990. """
  1991. try:
  1992. file_contents = file_or_filename.read()
  1993. except AttributeError:
  1994. with open(file_or_filename, "r") as f:
  1995. file_contents = f.read()
  1996. try:
  1997. return self.parseString(file_contents, parseAll)
  1998. except ParseBaseException as exc:
  1999. if ParserElement.verbose_stacktrace:
  2000. raise
  2001. else:
  2002. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  2003. raise exc
  2004. def __eq__(self,other):
  2005. if isinstance(other, ParserElement):
  2006. return self is other or vars(self) == vars(other)
  2007. elif isinstance(other, basestring):
  2008. return self.matches(other)
  2009. else:
  2010. return super(ParserElement,self)==other
  2011. def __ne__(self,other):
  2012. return not (self == other)
  2013. def __hash__(self):
  2014. return hash(id(self))
  2015. def __req__(self,other):
  2016. return self == other
  2017. def __rne__(self,other):
  2018. return not (self == other)
  2019. def matches(self, testString, parseAll=True):
  2020. """
  2021. Method for quick testing of a parser against a test string. Good for simple
  2022. inline microtests of sub expressions while building up larger parser.
  2023. Parameters:
  2024. - testString - to test against this expression for a match
  2025. - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
  2026. Example::
  2027. expr = Word(nums)
  2028. assert expr.matches("100")
  2029. """
  2030. try:
  2031. self.parseString(_ustr(testString), parseAll=parseAll)
  2032. return True
  2033. except ParseBaseException:
  2034. return False
  2035. def runTests(self, tests, parseAll=True, comment='#',
  2036. fullDump=True, printResults=True, failureTests=False, postParse=None):
  2037. """
  2038. Execute the parse expression on a series of test strings, showing each
  2039. test, the parsed results or where the parse failed. Quick and easy way to
  2040. run a parse expression against a list of sample strings.
  2041. Parameters:
  2042. - tests - a list of separate test strings, or a multiline string of test strings
  2043. - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
  2044. - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
  2045. string; pass None to disable comment filtering
  2046. - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
  2047. if False, only dump nested list
  2048. - printResults - (default= ``True``) prints test output to stdout
  2049. - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
  2050. - postParse - (default= ``None``) optional callback for successful parse results; called as
  2051. `fn(test_string, parse_results)` and returns a string to be added to the test output
  2052. Returns: a (success, results) tuple, where success indicates that all tests succeeded
  2053. (or failed if ``failureTests`` is True), and the results contain a list of lines of each
  2054. test's output
  2055. Example::
  2056. number_expr = pyparsing_common.number.copy()
  2057. result = number_expr.runTests('''
  2058. # unsigned integer
  2059. 100
  2060. # negative integer
  2061. -100
  2062. # float with scientific notation
  2063. 6.02e23
  2064. # integer with scientific notation
  2065. 1e-12
  2066. ''')
  2067. print("Success" if result[0] else "Failed!")
  2068. result = number_expr.runTests('''
  2069. # stray character
  2070. 100Z
  2071. # missing leading digit before '.'
  2072. -.100
  2073. # too many '.'
  2074. 3.14.159
  2075. ''', failureTests=True)
  2076. print("Success" if result[0] else "Failed!")
  2077. prints::
  2078. # unsigned integer
  2079. 100
  2080. [100]
  2081. # negative integer
  2082. -100
  2083. [-100]
  2084. # float with scientific notation
  2085. 6.02e23
  2086. [6.02e+23]
  2087. # integer with scientific notation
  2088. 1e-12
  2089. [1e-12]
  2090. Success
  2091. # stray character
  2092. 100Z
  2093. ^
  2094. FAIL: Expected end of text (at char 3), (line:1, col:4)
  2095. # missing leading digit before '.'
  2096. -.100
  2097. ^
  2098. FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
  2099. # too many '.'
  2100. 3.14.159
  2101. ^
  2102. FAIL: Expected end of text (at char 4), (line:1, col:5)
  2103. Success
  2104. Each test string must be on a single line. If you want to test a string that spans multiple
  2105. lines, create a test like this::
  2106. expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
  2107. (Note that this is a raw string literal, you must include the leading 'r'.)
  2108. """
  2109. if isinstance(tests, basestring):
  2110. tests = list(map(str.strip, tests.rstrip().splitlines()))
  2111. if isinstance(comment, basestring):
  2112. comment = Literal(comment)
  2113. allResults = []
  2114. comments = []
  2115. success = True
  2116. for t in tests:
  2117. if comment is not None and comment.matches(t, False) or comments and not t:
  2118. comments.append(t)
  2119. continue
  2120. if not t:
  2121. continue
  2122. out = ['\n'.join(comments), t]
  2123. comments = []
  2124. try:
  2125. # convert newline marks to actual newlines, and strip leading BOM if present
  2126. t = t.replace(r'\n','\n').lstrip('\ufeff')
  2127. result = self.parseString(t, parseAll=parseAll)
  2128. out.append(result.dump(full=fullDump))
  2129. success = success and not failureTests
  2130. if postParse is not None:
  2131. try:
  2132. pp_value = postParse(t, result)
  2133. if pp_value is not None:
  2134. out.append(str(pp_value))
  2135. except Exception as e:
  2136. out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
  2137. except ParseBaseException as pe:
  2138. fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
  2139. if '\n' in t:
  2140. out.append(line(pe.loc, t))
  2141. out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
  2142. else:
  2143. out.append(' '*pe.loc + '^' + fatal)
  2144. out.append("FAIL: " + str(pe))
  2145. success = success and failureTests
  2146. result = pe
  2147. except Exception as exc:
  2148. out.append("FAIL-EXCEPTION: " + str(exc))
  2149. success = success and failureTests
  2150. result = exc
  2151. if printResults:
  2152. if fullDump:
  2153. out.append('')
  2154. print('\n'.join(out))
  2155. allResults.append((t, result))
  2156. return success, allResults
  2157. class Token(ParserElement):
  2158. """Abstract :class:`ParserElement` subclass, for defining atomic
  2159. matching patterns.
  2160. """
  2161. def __init__( self ):
  2162. super(Token,self).__init__( savelist=False )
  2163. class Empty(Token):
  2164. """An empty token, will always match.
  2165. """
  2166. def __init__( self ):
  2167. super(Empty,self).__init__()
  2168. self.name = "Empty"
  2169. self.mayReturnEmpty = True
  2170. self.mayIndexError = False
  2171. class NoMatch(Token):
  2172. """A token that will never match.
  2173. """
  2174. def __init__( self ):
  2175. super(NoMatch,self).__init__()
  2176. self.name = "NoMatch"
  2177. self.mayReturnEmpty = True
  2178. self.mayIndexError = False
  2179. self.errmsg = "Unmatchable token"
  2180. def parseImpl( self, instring, loc, doActions=True ):
  2181. raise ParseException(instring, loc, self.errmsg, self)
  2182. class Literal(Token):
  2183. """Token to exactly match a specified string.
  2184. Example::
  2185. Literal('blah').parseString('blah') # -> ['blah']
  2186. Literal('blah').parseString('blahfooblah') # -> ['blah']
  2187. Literal('blah').parseString('bla') # -> Exception: Expected "blah"
  2188. For case-insensitive matching, use :class:`CaselessLiteral`.
  2189. For keyword matching (force word break before and after the matched string),
  2190. use :class:`Keyword` or :class:`CaselessKeyword`.
  2191. """
  2192. def __init__( self, matchString ):
  2193. super(Literal,self).__init__()
  2194. self.match = matchString
  2195. self.matchLen = len(matchString)
  2196. try:
  2197. self.firstMatchChar = matchString[0]
  2198. except IndexError:
  2199. warnings.warn("null string passed to Literal; use Empty() instead",
  2200. SyntaxWarning, stacklevel=2)
  2201. self.__class__ = Empty
  2202. self.name = '"%s"' % _ustr(self.match)
  2203. self.errmsg = "Expected " + self.name
  2204. self.mayReturnEmpty = False
  2205. self.mayIndexError = False
  2206. # Performance tuning: this routine gets called a *lot*
  2207. # if this is a single character match string and the first character matches,
  2208. # short-circuit as quickly as possible, and avoid calling startswith
  2209. #~ @profile
  2210. def parseImpl( self, instring, loc, doActions=True ):
  2211. if (instring[loc] == self.firstMatchChar and
  2212. (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  2213. return loc+self.matchLen, self.match
  2214. raise ParseException(instring, loc, self.errmsg, self)
  2215. _L = Literal
  2216. ParserElement._literalStringClass = Literal
  2217. class Keyword(Token):
  2218. """Token to exactly match a specified string as a keyword, that is,
  2219. it must be immediately followed by a non-keyword character. Compare
  2220. with :class:`Literal`:
  2221. - ``Literal("if")`` will match the leading ``'if'`` in
  2222. ``'ifAndOnlyIf'``.
  2223. - ``Keyword("if")`` will not; it will only match the leading
  2224. ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
  2225. Accepts two optional constructor arguments in addition to the
  2226. keyword string:
  2227. - ``identChars`` is a string of characters that would be valid
  2228. identifier characters, defaulting to all alphanumerics + "_" and
  2229. "$"
  2230. - ``caseless`` allows case-insensitive matching, default is ``False``.
  2231. Example::
  2232. Keyword("start").parseString("start") # -> ['start']
  2233. Keyword("start").parseString("starting") # -> Exception
  2234. For case-insensitive matching, use :class:`CaselessKeyword`.
  2235. """
  2236. DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  2237. def __init__( self, matchString, identChars=None, caseless=False ):
  2238. super(Keyword,self).__init__()
  2239. if identChars is None:
  2240. identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2241. self.match = matchString
  2242. self.matchLen = len(matchString)
  2243. try:
  2244. self.firstMatchChar = matchString[0]
  2245. except IndexError:
  2246. warnings.warn("null string passed to Keyword; use Empty() instead",
  2247. SyntaxWarning, stacklevel=2)
  2248. self.name = '"%s"' % self.match
  2249. self.errmsg = "Expected " + self.name
  2250. self.mayReturnEmpty = False
  2251. self.mayIndexError = False
  2252. self.caseless = caseless
  2253. if caseless:
  2254. self.caselessmatch = matchString.upper()
  2255. identChars = identChars.upper()
  2256. self.identChars = set(identChars)
  2257. def parseImpl( self, instring, loc, doActions=True ):
  2258. if self.caseless:
  2259. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2260. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  2261. (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  2262. return loc+self.matchLen, self.match
  2263. else:
  2264. if (instring[loc] == self.firstMatchChar and
  2265. (self.matchLen==1 or instring.startswith(self.match,loc)) and
  2266. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  2267. (loc == 0 or instring[loc-1] not in self.identChars) ):
  2268. return loc+self.matchLen, self.match
  2269. raise ParseException(instring, loc, self.errmsg, self)
  2270. def copy(self):
  2271. c = super(Keyword,self).copy()
  2272. c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2273. return c
  2274. @staticmethod
  2275. def setDefaultKeywordChars( chars ):
  2276. """Overrides the default Keyword chars
  2277. """
  2278. Keyword.DEFAULT_KEYWORD_CHARS = chars
  2279. class CaselessLiteral(Literal):
  2280. """Token to match a specified string, ignoring case of letters.
  2281. Note: the matched results will always be in the case of the given
  2282. match string, NOT the case of the input text.
  2283. Example::
  2284. OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
  2285. (Contrast with example for :class:`CaselessKeyword`.)
  2286. """
  2287. def __init__( self, matchString ):
  2288. super(CaselessLiteral,self).__init__( matchString.upper() )
  2289. # Preserve the defining literal.
  2290. self.returnString = matchString
  2291. self.name = "'%s'" % self.returnString
  2292. self.errmsg = "Expected " + self.name
  2293. def parseImpl( self, instring, loc, doActions=True ):
  2294. if instring[ loc:loc+self.matchLen ].upper() == self.match:
  2295. return loc+self.matchLen, self.returnString
  2296. raise ParseException(instring, loc, self.errmsg, self)
  2297. class CaselessKeyword(Keyword):
  2298. """
  2299. Caseless version of :class:`Keyword`.
  2300. Example::
  2301. OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
  2302. (Contrast with example for :class:`CaselessLiteral`.)
  2303. """
  2304. def __init__( self, matchString, identChars=None ):
  2305. super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  2306. class CloseMatch(Token):
  2307. """A variation on :class:`Literal` which matches "close" matches,
  2308. that is, strings with at most 'n' mismatching characters.
  2309. :class:`CloseMatch` takes parameters:
  2310. - ``match_string`` - string to be matched
  2311. - ``maxMismatches`` - (``default=1``) maximum number of
  2312. mismatches allowed to count as a match
  2313. The results from a successful parse will contain the matched text
  2314. from the input string and the following named results:
  2315. - ``mismatches`` - a list of the positions within the
  2316. match_string where mismatches were found
  2317. - ``original`` - the original match_string used to compare
  2318. against the input string
  2319. If ``mismatches`` is an empty list, then the match was an exact
  2320. match.
  2321. Example::
  2322. patt = CloseMatch("ATCATCGAATGGA")
  2323. patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
  2324. patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
  2325. # exact match
  2326. patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
  2327. # close match allowing up to 2 mismatches
  2328. patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
  2329. patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
  2330. """
  2331. def __init__(self, match_string, maxMismatches=1):
  2332. super(CloseMatch,self).__init__()
  2333. self.name = match_string
  2334. self.match_string = match_string
  2335. self.maxMismatches = maxMismatches
  2336. self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
  2337. self.mayIndexError = False
  2338. self.mayReturnEmpty = False
  2339. def parseImpl( self, instring, loc, doActions=True ):
  2340. start = loc
  2341. instrlen = len(instring)
  2342. maxloc = start + len(self.match_string)
  2343. if maxloc <= instrlen:
  2344. match_string = self.match_string
  2345. match_stringloc = 0
  2346. mismatches = []
  2347. maxMismatches = self.maxMismatches
  2348. for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
  2349. src,mat = s_m
  2350. if src != mat:
  2351. mismatches.append(match_stringloc)
  2352. if len(mismatches) > maxMismatches:
  2353. break
  2354. else:
  2355. loc = match_stringloc + 1
  2356. results = ParseResults([instring[start:loc]])
  2357. results['original'] = self.match_string
  2358. results['mismatches'] = mismatches
  2359. return loc, results
  2360. raise ParseException(instring, loc, self.errmsg, self)
  2361. class Word(Token):
  2362. """Token for matching words composed of allowed character sets.
  2363. Defined with string containing all allowed initial characters, an
  2364. optional string containing allowed body characters (if omitted,
  2365. defaults to the initial character set), and an optional minimum,
  2366. maximum, and/or exact length. The default value for ``min`` is
  2367. 1 (a minimum value < 1 is not valid); the default values for
  2368. ``max`` and ``exact`` are 0, meaning no maximum or exact
  2369. length restriction. An optional ``excludeChars`` parameter can
  2370. list characters that might be found in the input ``bodyChars``
  2371. string; useful to define a word of all printables except for one or
  2372. two characters, for instance.
  2373. :class:`srange` is useful for defining custom character set strings
  2374. for defining ``Word`` expressions, using range notation from
  2375. regular expression character sets.
  2376. A common mistake is to use :class:`Word` to match a specific literal
  2377. string, as in ``Word("Address")``. Remember that :class:`Word`
  2378. uses the string argument to define *sets* of matchable characters.
  2379. This expression would match "Add", "AAA", "dAred", or any other word
  2380. made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
  2381. exact literal string, use :class:`Literal` or :class:`Keyword`.
  2382. pyparsing includes helper strings for building Words:
  2383. - :class:`alphas`
  2384. - :class:`nums`
  2385. - :class:`alphanums`
  2386. - :class:`hexnums`
  2387. - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
  2388. - accented, tilded, umlauted, etc.)
  2389. - :class:`punc8bit` (non-alphabetic characters in ASCII range
  2390. 128-255 - currency, symbols, superscripts, diacriticals, etc.)
  2391. - :class:`printables` (any non-whitespace character)
  2392. Example::
  2393. # a word composed of digits
  2394. integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
  2395. # a word with a leading capital, and zero or more lowercase
  2396. capital_word = Word(alphas.upper(), alphas.lower())
  2397. # hostnames are alphanumeric, with leading alpha, and '-'
  2398. hostname = Word(alphas, alphanums+'-')
  2399. # roman numeral (not a strict parser, accepts invalid mix of characters)
  2400. roman = Word("IVXLCDM")
  2401. # any string of non-whitespace characters, except for ','
  2402. csv_value = Word(printables, excludeChars=",")
  2403. """
  2404. def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
  2405. super(Word,self).__init__()
  2406. if excludeChars:
  2407. initChars = ''.join(c for c in initChars if c not in excludeChars)
  2408. if bodyChars:
  2409. bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
  2410. self.initCharsOrig = initChars
  2411. self.initChars = set(initChars)
  2412. if bodyChars :
  2413. self.bodyCharsOrig = bodyChars
  2414. self.bodyChars = set(bodyChars)
  2415. else:
  2416. self.bodyCharsOrig = initChars
  2417. self.bodyChars = set(initChars)
  2418. self.maxSpecified = max > 0
  2419. if min < 1:
  2420. raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
  2421. self.minLen = min
  2422. if max > 0:
  2423. self.maxLen = max
  2424. else:
  2425. self.maxLen = _MAX_INT
  2426. if exact > 0:
  2427. self.maxLen = exact
  2428. self.minLen = exact
  2429. self.name = _ustr(self)
  2430. self.errmsg = "Expected " + self.name
  2431. self.mayIndexError = False
  2432. self.asKeyword = asKeyword
  2433. if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  2434. if self.bodyCharsOrig == self.initCharsOrig:
  2435. self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  2436. elif len(self.initCharsOrig) == 1:
  2437. self.reString = "%s[%s]*" % \
  2438. (re.escape(self.initCharsOrig),
  2439. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2440. else:
  2441. self.reString = "[%s][%s]*" % \
  2442. (_escapeRegexRangeChars(self.initCharsOrig),
  2443. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2444. if self.asKeyword:
  2445. self.reString = r"\b"+self.reString+r"\b"
  2446. try:
  2447. self.re = re.compile( self.reString )
  2448. except Exception:
  2449. self.re = None
  2450. def parseImpl( self, instring, loc, doActions=True ):
  2451. if self.re:
  2452. result = self.re.match(instring,loc)
  2453. if not result:
  2454. raise ParseException(instring, loc, self.errmsg, self)
  2455. loc = result.end()
  2456. return loc, result.group()
  2457. if not(instring[ loc ] in self.initChars):
  2458. raise ParseException(instring, loc, self.errmsg, self)
  2459. start = loc
  2460. loc += 1
  2461. instrlen = len(instring)
  2462. bodychars = self.bodyChars
  2463. maxloc = start + self.maxLen
  2464. maxloc = min( maxloc, instrlen )
  2465. while loc < maxloc and instring[loc] in bodychars:
  2466. loc += 1
  2467. throwException = False
  2468. if loc - start < self.minLen:
  2469. throwException = True
  2470. if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  2471. throwException = True
  2472. if self.asKeyword:
  2473. if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
  2474. throwException = True
  2475. if throwException:
  2476. raise ParseException(instring, loc, self.errmsg, self)
  2477. return loc, instring[start:loc]
  2478. def __str__( self ):
  2479. try:
  2480. return super(Word,self).__str__()
  2481. except Exception:
  2482. pass
  2483. if self.strRepr is None:
  2484. def charsAsStr(s):
  2485. if len(s)>4:
  2486. return s[:4]+"..."
  2487. else:
  2488. return s
  2489. if ( self.initCharsOrig != self.bodyCharsOrig ):
  2490. self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  2491. else:
  2492. self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  2493. return self.strRepr
  2494. class Char(Word):
  2495. """A short-cut class for defining ``Word(characters, exact=1)``,
  2496. when defining a match of any single character in a string of
  2497. characters.
  2498. """
  2499. def __init__(self, charset):
  2500. super(Char, self).__init__(charset, exact=1)
  2501. self.reString = "[%s]" % _escapeRegexRangeChars(self.initCharsOrig)
  2502. self.re = re.compile( self.reString )
  2503. class Regex(Token):
  2504. r"""Token for matching strings that match a given regular
  2505. expression. Defined with string specifying the regular expression in
  2506. a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_.
  2507. If the given regex contains named groups (defined using ``(?P<name>...)``),
  2508. these will be preserved as named parse results.
  2509. Example::
  2510. realnum = Regex(r"[+-]?\d+\.\d*")
  2511. date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
  2512. # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
  2513. roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
  2514. """
  2515. compiledREtype = type(re.compile("[A-Z]"))
  2516. def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
  2517. """The parameters ``pattern`` and ``flags`` are passed
  2518. to the ``re.compile()`` function as-is. See the Python
  2519. `re module <https://docs.python.org/3/library/re.html>`_ module for an
  2520. explanation of the acceptable patterns and flags.
  2521. """
  2522. super(Regex,self).__init__()
  2523. if isinstance(pattern, basestring):
  2524. if not pattern:
  2525. warnings.warn("null string passed to Regex; use Empty() instead",
  2526. SyntaxWarning, stacklevel=2)
  2527. self.pattern = pattern
  2528. self.flags = flags
  2529. try:
  2530. self.re = re.compile(self.pattern, self.flags)
  2531. self.reString = self.pattern
  2532. except sre_constants.error:
  2533. warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
  2534. SyntaxWarning, stacklevel=2)
  2535. raise
  2536. elif isinstance(pattern, Regex.compiledREtype):
  2537. self.re = pattern
  2538. self.pattern = \
  2539. self.reString = str(pattern)
  2540. self.flags = flags
  2541. else:
  2542. raise ValueError("Regex may only be constructed with a string or a compiled RE object")
  2543. self.name = _ustr(self)
  2544. self.errmsg = "Expected " + self.name
  2545. self.mayIndexError = False
  2546. self.mayReturnEmpty = True
  2547. self.asGroupList = asGroupList
  2548. self.asMatch = asMatch
  2549. def parseImpl( self, instring, loc, doActions=True ):
  2550. result = self.re.match(instring,loc)
  2551. if not result:
  2552. raise ParseException(instring, loc, self.errmsg, self)
  2553. loc = result.end()
  2554. if self.asMatch:
  2555. ret = result
  2556. elif self.asGroupList:
  2557. ret = result.groups()
  2558. else:
  2559. ret = ParseResults(result.group())
  2560. d = result.groupdict()
  2561. if d:
  2562. for k, v in d.items():
  2563. ret[k] = v
  2564. return loc,ret
  2565. def __str__( self ):
  2566. try:
  2567. return super(Regex,self).__str__()
  2568. except Exception:
  2569. pass
  2570. if self.strRepr is None:
  2571. self.strRepr = "Re:(%s)" % repr(self.pattern)
  2572. return self.strRepr
  2573. def sub(self, repl):
  2574. """
  2575. Return Regex with an attached parse action to transform the parsed
  2576. result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
  2577. Example::
  2578. make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
  2579. print(make_html.transformString("h1:main title:"))
  2580. # prints "<h1>main title</h1>"
  2581. """
  2582. if self.asGroupList:
  2583. warnings.warn("cannot use sub() with Regex(asGroupList=True)",
  2584. SyntaxWarning, stacklevel=2)
  2585. raise SyntaxError()
  2586. if self.asMatch and callable(repl):
  2587. warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
  2588. SyntaxWarning, stacklevel=2)
  2589. raise SyntaxError()
  2590. if self.asMatch:
  2591. def pa(tokens):
  2592. return tokens[0].expand(repl)
  2593. else:
  2594. def pa(tokens):
  2595. return self.re.sub(repl, tokens[0])
  2596. return self.addParseAction(pa)
  2597. class QuotedString(Token):
  2598. r"""
  2599. Token for matching strings that are delimited by quoting characters.
  2600. Defined with the following parameters:
  2601. - quoteChar - string of one or more characters defining the
  2602. quote delimiting string
  2603. - escChar - character to escape quotes, typically backslash
  2604. (default= ``None`` )
  2605. - escQuote - special quote sequence to escape an embedded quote
  2606. string (such as SQL's ``""`` to escape an embedded ``"``)
  2607. (default= ``None`` )
  2608. - multiline - boolean indicating whether quotes can span
  2609. multiple lines (default= ``False`` )
  2610. - unquoteResults - boolean indicating whether the matched text
  2611. should be unquoted (default= ``True`` )
  2612. - endQuoteChar - string of one or more characters defining the
  2613. end of the quote delimited string (default= ``None`` => same as
  2614. quoteChar)
  2615. - convertWhitespaceEscapes - convert escaped whitespace
  2616. (``'\t'``, ``'\n'``, etc.) to actual whitespace
  2617. (default= ``True`` )
  2618. Example::
  2619. qs = QuotedString('"')
  2620. print(qs.searchString('lsjdf "This is the quote" sldjf'))
  2621. complex_qs = QuotedString('{{', endQuoteChar='}}')
  2622. print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
  2623. sql_qs = QuotedString('"', escQuote='""')
  2624. print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
  2625. prints::
  2626. [['This is the quote']]
  2627. [['This is the "quote"']]
  2628. [['This is the quote with "embedded" quotes']]
  2629. """
  2630. def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
  2631. super(QuotedString,self).__init__()
  2632. # remove white space from quote chars - wont work anyway
  2633. quoteChar = quoteChar.strip()
  2634. if not quoteChar:
  2635. warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2636. raise SyntaxError()
  2637. if endQuoteChar is None:
  2638. endQuoteChar = quoteChar
  2639. else:
  2640. endQuoteChar = endQuoteChar.strip()
  2641. if not endQuoteChar:
  2642. warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2643. raise SyntaxError()
  2644. self.quoteChar = quoteChar
  2645. self.quoteCharLen = len(quoteChar)
  2646. self.firstQuoteChar = quoteChar[0]
  2647. self.endQuoteChar = endQuoteChar
  2648. self.endQuoteCharLen = len(endQuoteChar)
  2649. self.escChar = escChar
  2650. self.escQuote = escQuote
  2651. self.unquoteResults = unquoteResults
  2652. self.convertWhitespaceEscapes = convertWhitespaceEscapes
  2653. if multiline:
  2654. self.flags = re.MULTILINE | re.DOTALL
  2655. self.pattern = r'%s(?:[^%s%s]' % \
  2656. ( re.escape(self.quoteChar),
  2657. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2658. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2659. else:
  2660. self.flags = 0
  2661. self.pattern = r'%s(?:[^%s\n\r%s]' % \
  2662. ( re.escape(self.quoteChar),
  2663. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2664. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2665. if len(self.endQuoteChar) > 1:
  2666. self.pattern += (
  2667. '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  2668. _escapeRegexRangeChars(self.endQuoteChar[i]))
  2669. for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
  2670. )
  2671. if escQuote:
  2672. self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  2673. if escChar:
  2674. self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  2675. self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  2676. self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  2677. try:
  2678. self.re = re.compile(self.pattern, self.flags)
  2679. self.reString = self.pattern
  2680. except sre_constants.error:
  2681. warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
  2682. SyntaxWarning, stacklevel=2)
  2683. raise
  2684. self.name = _ustr(self)
  2685. self.errmsg = "Expected " + self.name
  2686. self.mayIndexError = False
  2687. self.mayReturnEmpty = True
  2688. def parseImpl( self, instring, loc, doActions=True ):
  2689. result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  2690. if not result:
  2691. raise ParseException(instring, loc, self.errmsg, self)
  2692. loc = result.end()
  2693. ret = result.group()
  2694. if self.unquoteResults:
  2695. # strip off quotes
  2696. ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  2697. if isinstance(ret,basestring):
  2698. # replace escaped whitespace
  2699. if '\\' in ret and self.convertWhitespaceEscapes:
  2700. ws_map = {
  2701. r'\t' : '\t',
  2702. r'\n' : '\n',
  2703. r'\f' : '\f',
  2704. r'\r' : '\r',
  2705. }
  2706. for wslit,wschar in ws_map.items():
  2707. ret = ret.replace(wslit, wschar)
  2708. # replace escaped characters
  2709. if self.escChar:
  2710. ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
  2711. # replace escaped quotes
  2712. if self.escQuote:
  2713. ret = ret.replace(self.escQuote, self.endQuoteChar)
  2714. return loc, ret
  2715. def __str__( self ):
  2716. try:
  2717. return super(QuotedString,self).__str__()
  2718. except Exception:
  2719. pass
  2720. if self.strRepr is None:
  2721. self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  2722. return self.strRepr
  2723. class CharsNotIn(Token):
  2724. """Token for matching words composed of characters *not* in a given
  2725. set (will include whitespace in matched characters if not listed in
  2726. the provided exclusion set - see example). Defined with string
  2727. containing all disallowed characters, and an optional minimum,
  2728. maximum, and/or exact length. The default value for ``min`` is
  2729. 1 (a minimum value < 1 is not valid); the default values for
  2730. ``max`` and ``exact`` are 0, meaning no maximum or exact
  2731. length restriction.
  2732. Example::
  2733. # define a comma-separated-value as anything that is not a ','
  2734. csv_value = CharsNotIn(',')
  2735. print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
  2736. prints::
  2737. ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
  2738. """
  2739. def __init__( self, notChars, min=1, max=0, exact=0 ):
  2740. super(CharsNotIn,self).__init__()
  2741. self.skipWhitespace = False
  2742. self.notChars = notChars
  2743. if min < 1:
  2744. raise ValueError(
  2745. "cannot specify a minimum length < 1; use " +
  2746. "Optional(CharsNotIn()) if zero-length char group is permitted")
  2747. self.minLen = min
  2748. if max > 0:
  2749. self.maxLen = max
  2750. else:
  2751. self.maxLen = _MAX_INT
  2752. if exact > 0:
  2753. self.maxLen = exact
  2754. self.minLen = exact
  2755. self.name = _ustr(self)
  2756. self.errmsg = "Expected " + self.name
  2757. self.mayReturnEmpty = ( self.minLen == 0 )
  2758. self.mayIndexError = False
  2759. def parseImpl( self, instring, loc, doActions=True ):
  2760. if instring[loc] in self.notChars:
  2761. raise ParseException(instring, loc, self.errmsg, self)
  2762. start = loc
  2763. loc += 1
  2764. notchars = self.notChars
  2765. maxlen = min( start+self.maxLen, len(instring) )
  2766. while loc < maxlen and \
  2767. (instring[loc] not in notchars):
  2768. loc += 1
  2769. if loc - start < self.minLen:
  2770. raise ParseException(instring, loc, self.errmsg, self)
  2771. return loc, instring[start:loc]
  2772. def __str__( self ):
  2773. try:
  2774. return super(CharsNotIn, self).__str__()
  2775. except Exception:
  2776. pass
  2777. if self.strRepr is None:
  2778. if len(self.notChars) > 4:
  2779. self.strRepr = "!W:(%s...)" % self.notChars[:4]
  2780. else:
  2781. self.strRepr = "!W:(%s)" % self.notChars
  2782. return self.strRepr
  2783. class White(Token):
  2784. """Special matching class for matching whitespace. Normally,
  2785. whitespace is ignored by pyparsing grammars. This class is included
  2786. when some whitespace structures are significant. Define with
  2787. a string containing the whitespace characters to be matched; default
  2788. is ``" \\t\\r\\n"``. Also takes optional ``min``,
  2789. ``max``, and ``exact`` arguments, as defined for the
  2790. :class:`Word` class.
  2791. """
  2792. whiteStrs = {
  2793. ' ' : '<SP>',
  2794. '\t': '<TAB>',
  2795. '\n': '<LF>',
  2796. '\r': '<CR>',
  2797. '\f': '<FF>',
  2798. 'u\00A0': '<NBSP>',
  2799. 'u\1680': '<OGHAM_SPACE_MARK>',
  2800. 'u\180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
  2801. 'u\2000': '<EN_QUAD>',
  2802. 'u\2001': '<EM_QUAD>',
  2803. 'u\2002': '<EN_SPACE>',
  2804. 'u\2003': '<EM_SPACE>',
  2805. 'u\2004': '<THREE-PER-EM_SPACE>',
  2806. 'u\2005': '<FOUR-PER-EM_SPACE>',
  2807. 'u\2006': '<SIX-PER-EM_SPACE>',
  2808. 'u\2007': '<FIGURE_SPACE>',
  2809. 'u\2008': '<PUNCTUATION_SPACE>',
  2810. 'u\2009': '<THIN_SPACE>',
  2811. 'u\200A': '<HAIR_SPACE>',
  2812. 'u\200B': '<ZERO_WIDTH_SPACE>',
  2813. 'u\202F': '<NNBSP>',
  2814. 'u\205F': '<MMSP>',
  2815. 'u\3000': '<IDEOGRAPHIC_SPACE>',
  2816. }
  2817. def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  2818. super(White,self).__init__()
  2819. self.matchWhite = ws
  2820. self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
  2821. #~ self.leaveWhitespace()
  2822. self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
  2823. self.mayReturnEmpty = True
  2824. self.errmsg = "Expected " + self.name
  2825. self.minLen = min
  2826. if max > 0:
  2827. self.maxLen = max
  2828. else:
  2829. self.maxLen = _MAX_INT
  2830. if exact > 0:
  2831. self.maxLen = exact
  2832. self.minLen = exact
  2833. def parseImpl( self, instring, loc, doActions=True ):
  2834. if not(instring[ loc ] in self.matchWhite):
  2835. raise ParseException(instring, loc, self.errmsg, self)
  2836. start = loc
  2837. loc += 1
  2838. maxloc = start + self.maxLen
  2839. maxloc = min( maxloc, len(instring) )
  2840. while loc < maxloc and instring[loc] in self.matchWhite:
  2841. loc += 1
  2842. if loc - start < self.minLen:
  2843. raise ParseException(instring, loc, self.errmsg, self)
  2844. return loc, instring[start:loc]
  2845. class _PositionToken(Token):
  2846. def __init__( self ):
  2847. super(_PositionToken,self).__init__()
  2848. self.name=self.__class__.__name__
  2849. self.mayReturnEmpty = True
  2850. self.mayIndexError = False
  2851. class GoToColumn(_PositionToken):
  2852. """Token to advance to a specific column of input text; useful for
  2853. tabular report scraping.
  2854. """
  2855. def __init__( self, colno ):
  2856. super(GoToColumn,self).__init__()
  2857. self.col = colno
  2858. def preParse( self, instring, loc ):
  2859. if col(loc,instring) != self.col:
  2860. instrlen = len(instring)
  2861. if self.ignoreExprs:
  2862. loc = self._skipIgnorables( instring, loc )
  2863. while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  2864. loc += 1
  2865. return loc
  2866. def parseImpl( self, instring, loc, doActions=True ):
  2867. thiscol = col( loc, instring )
  2868. if thiscol > self.col:
  2869. raise ParseException( instring, loc, "Text not in expected column", self )
  2870. newloc = loc + self.col - thiscol
  2871. ret = instring[ loc: newloc ]
  2872. return newloc, ret
  2873. class LineStart(_PositionToken):
  2874. """Matches if current position is at the beginning of a line within
  2875. the parse string
  2876. Example::
  2877. test = '''\
  2878. AAA this line
  2879. AAA and this line
  2880. AAA but not this one
  2881. B AAA and definitely not this one
  2882. '''
  2883. for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
  2884. print(t)
  2885. prints::
  2886. ['AAA', ' this line']
  2887. ['AAA', ' and this line']
  2888. """
  2889. def __init__( self ):
  2890. super(LineStart,self).__init__()
  2891. self.errmsg = "Expected start of line"
  2892. def parseImpl( self, instring, loc, doActions=True ):
  2893. if col(loc, instring) == 1:
  2894. return loc, []
  2895. raise ParseException(instring, loc, self.errmsg, self)
  2896. class LineEnd(_PositionToken):
  2897. """Matches if current position is at the end of a line within the
  2898. parse string
  2899. """
  2900. def __init__( self ):
  2901. super(LineEnd,self).__init__()
  2902. self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
  2903. self.errmsg = "Expected end of line"
  2904. def parseImpl( self, instring, loc, doActions=True ):
  2905. if loc<len(instring):
  2906. if instring[loc] == "\n":
  2907. return loc+1, "\n"
  2908. else:
  2909. raise ParseException(instring, loc, self.errmsg, self)
  2910. elif loc == len(instring):
  2911. return loc+1, []
  2912. else:
  2913. raise ParseException(instring, loc, self.errmsg, self)
  2914. class StringStart(_PositionToken):
  2915. """Matches if current position is at the beginning of the parse
  2916. string
  2917. """
  2918. def __init__( self ):
  2919. super(StringStart,self).__init__()
  2920. self.errmsg = "Expected start of text"
  2921. def parseImpl( self, instring, loc, doActions=True ):
  2922. if loc != 0:
  2923. # see if entire string up to here is just whitespace and ignoreables
  2924. if loc != self.preParse( instring, 0 ):
  2925. raise ParseException(instring, loc, self.errmsg, self)
  2926. return loc, []
  2927. class StringEnd(_PositionToken):
  2928. """Matches if current position is at the end of the parse string
  2929. """
  2930. def __init__( self ):
  2931. super(StringEnd,self).__init__()
  2932. self.errmsg = "Expected end of text"
  2933. def parseImpl( self, instring, loc, doActions=True ):
  2934. if loc < len(instring):
  2935. raise ParseException(instring, loc, self.errmsg, self)
  2936. elif loc == len(instring):
  2937. return loc+1, []
  2938. elif loc > len(instring):
  2939. return loc, []
  2940. else:
  2941. raise ParseException(instring, loc, self.errmsg, self)
  2942. class WordStart(_PositionToken):
  2943. """Matches if the current position is at the beginning of a Word,
  2944. and is not preceded by any character in a given set of
  2945. ``wordChars`` (default= ``printables``). To emulate the
  2946. ``\b`` behavior of regular expressions, use
  2947. ``WordStart(alphanums)``. ``WordStart`` will also match at
  2948. the beginning of the string being parsed, or at the beginning of
  2949. a line.
  2950. """
  2951. def __init__(self, wordChars = printables):
  2952. super(WordStart,self).__init__()
  2953. self.wordChars = set(wordChars)
  2954. self.errmsg = "Not at the start of a word"
  2955. def parseImpl(self, instring, loc, doActions=True ):
  2956. if loc != 0:
  2957. if (instring[loc-1] in self.wordChars or
  2958. instring[loc] not in self.wordChars):
  2959. raise ParseException(instring, loc, self.errmsg, self)
  2960. return loc, []
  2961. class WordEnd(_PositionToken):
  2962. """Matches if the current position is at the end of a Word, and is
  2963. not followed by any character in a given set of ``wordChars``
  2964. (default= ``printables``). To emulate the ``\b`` behavior of
  2965. regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
  2966. will also match at the end of the string being parsed, or at the end
  2967. of a line.
  2968. """
  2969. def __init__(self, wordChars = printables):
  2970. super(WordEnd,self).__init__()
  2971. self.wordChars = set(wordChars)
  2972. self.skipWhitespace = False
  2973. self.errmsg = "Not at the end of a word"
  2974. def parseImpl(self, instring, loc, doActions=True ):
  2975. instrlen = len(instring)
  2976. if instrlen>0 and loc<instrlen:
  2977. if (instring[loc] in self.wordChars or
  2978. instring[loc-1] not in self.wordChars):
  2979. raise ParseException(instring, loc, self.errmsg, self)
  2980. return loc, []
  2981. class ParseExpression(ParserElement):
  2982. """Abstract subclass of ParserElement, for combining and
  2983. post-processing parsed tokens.
  2984. """
  2985. def __init__( self, exprs, savelist = False ):
  2986. super(ParseExpression,self).__init__(savelist)
  2987. if isinstance( exprs, _generatorType ):
  2988. exprs = list(exprs)
  2989. if isinstance( exprs, basestring ):
  2990. self.exprs = [ ParserElement._literalStringClass( exprs ) ]
  2991. elif isinstance( exprs, Iterable ):
  2992. exprs = list(exprs)
  2993. # if sequence of strings provided, wrap with Literal
  2994. if all(isinstance(expr, basestring) for expr in exprs):
  2995. exprs = map(ParserElement._literalStringClass, exprs)
  2996. self.exprs = list(exprs)
  2997. else:
  2998. try:
  2999. self.exprs = list( exprs )
  3000. except TypeError:
  3001. self.exprs = [ exprs ]
  3002. self.callPreparse = False
  3003. def __getitem__( self, i ):
  3004. return self.exprs[i]
  3005. def append( self, other ):
  3006. self.exprs.append( other )
  3007. self.strRepr = None
  3008. return self
  3009. def leaveWhitespace( self ):
  3010. """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
  3011. all contained expressions."""
  3012. self.skipWhitespace = False
  3013. self.exprs = [ e.copy() for e in self.exprs ]
  3014. for e in self.exprs:
  3015. e.leaveWhitespace()
  3016. return self
  3017. def ignore( self, other ):
  3018. if isinstance( other, Suppress ):
  3019. if other not in self.ignoreExprs:
  3020. super( ParseExpression, self).ignore( other )
  3021. for e in self.exprs:
  3022. e.ignore( self.ignoreExprs[-1] )
  3023. else:
  3024. super( ParseExpression, self).ignore( other )
  3025. for e in self.exprs:
  3026. e.ignore( self.ignoreExprs[-1] )
  3027. return self
  3028. def __str__( self ):
  3029. try:
  3030. return super(ParseExpression,self).__str__()
  3031. except Exception:
  3032. pass
  3033. if self.strRepr is None:
  3034. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  3035. return self.strRepr
  3036. def streamline( self ):
  3037. super(ParseExpression,self).streamline()
  3038. for e in self.exprs:
  3039. e.streamline()
  3040. # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  3041. # but only if there are no parse actions or resultsNames on the nested And's
  3042. # (likewise for Or's and MatchFirst's)
  3043. if ( len(self.exprs) == 2 ):
  3044. other = self.exprs[0]
  3045. if ( isinstance( other, self.__class__ ) and
  3046. not(other.parseAction) and
  3047. other.resultsName is None and
  3048. not other.debug ):
  3049. self.exprs = other.exprs[:] + [ self.exprs[1] ]
  3050. self.strRepr = None
  3051. self.mayReturnEmpty |= other.mayReturnEmpty
  3052. self.mayIndexError |= other.mayIndexError
  3053. other = self.exprs[-1]
  3054. if ( isinstance( other, self.__class__ ) and
  3055. not(other.parseAction) and
  3056. other.resultsName is None and
  3057. not other.debug ):
  3058. self.exprs = self.exprs[:-1] + other.exprs[:]
  3059. self.strRepr = None
  3060. self.mayReturnEmpty |= other.mayReturnEmpty
  3061. self.mayIndexError |= other.mayIndexError
  3062. self.errmsg = "Expected " + _ustr(self)
  3063. return self
  3064. def setResultsName( self, name, listAllMatches=False ):
  3065. ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
  3066. return ret
  3067. def validate( self, validateTrace=[] ):
  3068. tmp = validateTrace[:]+[self]
  3069. for e in self.exprs:
  3070. e.validate(tmp)
  3071. self.checkRecursion( [] )
  3072. def copy(self):
  3073. ret = super(ParseExpression,self).copy()
  3074. ret.exprs = [e.copy() for e in self.exprs]
  3075. return ret
  3076. class And(ParseExpression):
  3077. """
  3078. Requires all given :class:`ParseExpression` s to be found in the given order.
  3079. Expressions may be separated by whitespace.
  3080. May be constructed using the ``'+'`` operator.
  3081. May also be constructed using the ``'-'`` operator, which will
  3082. suppress backtracking.
  3083. Example::
  3084. integer = Word(nums)
  3085. name_expr = OneOrMore(Word(alphas))
  3086. expr = And([integer("id"),name_expr("name"),integer("age")])
  3087. # more easily written as:
  3088. expr = integer("id") + name_expr("name") + integer("age")
  3089. """
  3090. class _ErrorStop(Empty):
  3091. def __init__(self, *args, **kwargs):
  3092. super(And._ErrorStop,self).__init__(*args, **kwargs)
  3093. self.name = '-'
  3094. self.leaveWhitespace()
  3095. def __init__( self, exprs, savelist = True ):
  3096. super(And,self).__init__(exprs, savelist)
  3097. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3098. self.setWhitespaceChars( self.exprs[0].whiteChars )
  3099. self.skipWhitespace = self.exprs[0].skipWhitespace
  3100. self.callPreparse = True
  3101. def streamline(self):
  3102. super(And, self).streamline()
  3103. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3104. return self
  3105. def parseImpl( self, instring, loc, doActions=True ):
  3106. # pass False as last arg to _parse for first element, since we already
  3107. # pre-parsed the string as part of our And pre-parsing
  3108. loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  3109. errorStop = False
  3110. for e in self.exprs[1:]:
  3111. if isinstance(e, And._ErrorStop):
  3112. errorStop = True
  3113. continue
  3114. if errorStop:
  3115. try:
  3116. loc, exprtokens = e._parse( instring, loc, doActions )
  3117. except ParseSyntaxException:
  3118. raise
  3119. except ParseBaseException as pe:
  3120. pe.__traceback__ = None
  3121. raise ParseSyntaxException._from_exception(pe)
  3122. except IndexError:
  3123. raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
  3124. else:
  3125. loc, exprtokens = e._parse( instring, loc, doActions )
  3126. if exprtokens or exprtokens.haskeys():
  3127. resultlist += exprtokens
  3128. return loc, resultlist
  3129. def __iadd__(self, other ):
  3130. if isinstance( other, basestring ):
  3131. other = ParserElement._literalStringClass( other )
  3132. return self.append( other ) #And( [ self, other ] )
  3133. def checkRecursion( self, parseElementList ):
  3134. subRecCheckList = parseElementList[:] + [ self ]
  3135. for e in self.exprs:
  3136. e.checkRecursion( subRecCheckList )
  3137. if not e.mayReturnEmpty:
  3138. break
  3139. def __str__( self ):
  3140. if hasattr(self,"name"):
  3141. return self.name
  3142. if self.strRepr is None:
  3143. self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
  3144. return self.strRepr
  3145. class Or(ParseExpression):
  3146. """Requires that at least one :class:`ParseExpression` is found. If
  3147. two expressions match, the expression that matches the longest
  3148. string will be used. May be constructed using the ``'^'``
  3149. operator.
  3150. Example::
  3151. # construct Or using '^' operator
  3152. number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
  3153. print(number.searchString("123 3.1416 789"))
  3154. prints::
  3155. [['123'], ['3.1416'], ['789']]
  3156. """
  3157. def __init__( self, exprs, savelist = False ):
  3158. super(Or,self).__init__(exprs, savelist)
  3159. if self.exprs:
  3160. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3161. else:
  3162. self.mayReturnEmpty = True
  3163. def streamline(self):
  3164. super(Or, self).streamline()
  3165. self.saveAsList = any(e.saveAsList for e in self.exprs)
  3166. return self
  3167. def parseImpl( self, instring, loc, doActions=True ):
  3168. maxExcLoc = -1
  3169. maxException = None
  3170. matches = []
  3171. for e in self.exprs:
  3172. try:
  3173. loc2 = e.tryParse( instring, loc )
  3174. except ParseException as err:
  3175. err.__traceback__ = None
  3176. if err.loc > maxExcLoc:
  3177. maxException = err
  3178. maxExcLoc = err.loc
  3179. except IndexError:
  3180. if len(instring) > maxExcLoc:
  3181. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3182. maxExcLoc = len(instring)
  3183. else:
  3184. # save match among all matches, to retry longest to shortest
  3185. matches.append((loc2, e))
  3186. if matches:
  3187. matches.sort(key=lambda x: -x[0])
  3188. for _,e in matches:
  3189. try:
  3190. return e._parse( instring, loc, doActions )
  3191. except ParseException as err:
  3192. err.__traceback__ = None
  3193. if err.loc > maxExcLoc:
  3194. maxException = err
  3195. maxExcLoc = err.loc
  3196. if maxException is not None:
  3197. maxException.msg = self.errmsg
  3198. raise maxException
  3199. else:
  3200. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3201. def __ixor__(self, other ):
  3202. if isinstance( other, basestring ):
  3203. other = ParserElement._literalStringClass( other )
  3204. return self.append( other ) #Or( [ self, other ] )
  3205. def __str__( self ):
  3206. if hasattr(self,"name"):
  3207. return self.name
  3208. if self.strRepr is None:
  3209. self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
  3210. return self.strRepr
  3211. def checkRecursion( self, parseElementList ):
  3212. subRecCheckList = parseElementList[:] + [ self ]
  3213. for e in self.exprs:
  3214. e.checkRecursion( subRecCheckList )
  3215. class MatchFirst(ParseExpression):
  3216. """Requires that at least one :class:`ParseExpression` is found. If
  3217. two expressions match, the first one listed is the one that will
  3218. match. May be constructed using the ``'|'`` operator.
  3219. Example::
  3220. # construct MatchFirst using '|' operator
  3221. # watch the order of expressions to match
  3222. number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
  3223. print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
  3224. # put more selective expression first
  3225. number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
  3226. print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
  3227. """
  3228. def __init__( self, exprs, savelist = False ):
  3229. super(MatchFirst,self).__init__(exprs, savelist)
  3230. if self.exprs:
  3231. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3232. # self.saveAsList = any(e.saveAsList for e in self.exprs)
  3233. else:
  3234. self.mayReturnEmpty = True
  3235. def streamline(self):
  3236. super(MatchFirst, self).streamline()
  3237. self.saveAsList = any(e.saveAsList for e in self.exprs)
  3238. return self
  3239. def parseImpl( self, instring, loc, doActions=True ):
  3240. maxExcLoc = -1
  3241. maxException = None
  3242. for e in self.exprs:
  3243. try:
  3244. ret = e._parse( instring, loc, doActions )
  3245. return ret
  3246. except ParseException as err:
  3247. if err.loc > maxExcLoc:
  3248. maxException = err
  3249. maxExcLoc = err.loc
  3250. except IndexError:
  3251. if len(instring) > maxExcLoc:
  3252. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3253. maxExcLoc = len(instring)
  3254. # only got here if no expression matched, raise exception for match that made it the furthest
  3255. else:
  3256. if maxException is not None:
  3257. maxException.msg = self.errmsg
  3258. raise maxException
  3259. else:
  3260. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3261. def __ior__(self, other ):
  3262. if isinstance( other, basestring ):
  3263. other = ParserElement._literalStringClass( other )
  3264. return self.append( other ) #MatchFirst( [ self, other ] )
  3265. def __str__( self ):
  3266. if hasattr(self,"name"):
  3267. return self.name
  3268. if self.strRepr is None:
  3269. self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
  3270. return self.strRepr
  3271. def checkRecursion( self, parseElementList ):
  3272. subRecCheckList = parseElementList[:] + [ self ]
  3273. for e in self.exprs:
  3274. e.checkRecursion( subRecCheckList )
  3275. class Each(ParseExpression):
  3276. """Requires all given :class:`ParseExpression` s to be found, but in
  3277. any order. Expressions may be separated by whitespace.
  3278. May be constructed using the ``'&'`` operator.
  3279. Example::
  3280. color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
  3281. shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
  3282. integer = Word(nums)
  3283. shape_attr = "shape:" + shape_type("shape")
  3284. posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
  3285. color_attr = "color:" + color("color")
  3286. size_attr = "size:" + integer("size")
  3287. # use Each (using operator '&') to accept attributes in any order
  3288. # (shape and posn are required, color and size are optional)
  3289. shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
  3290. shape_spec.runTests('''
  3291. shape: SQUARE color: BLACK posn: 100, 120
  3292. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3293. color:GREEN size:20 shape:TRIANGLE posn:20,40
  3294. '''
  3295. )
  3296. prints::
  3297. shape: SQUARE color: BLACK posn: 100, 120
  3298. ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
  3299. - color: BLACK
  3300. - posn: ['100', ',', '120']
  3301. - x: 100
  3302. - y: 120
  3303. - shape: SQUARE
  3304. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3305. ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
  3306. - color: BLUE
  3307. - posn: ['50', ',', '80']
  3308. - x: 50
  3309. - y: 80
  3310. - shape: CIRCLE
  3311. - size: 50
  3312. color: GREEN size: 20 shape: TRIANGLE posn: 20,40
  3313. ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
  3314. - color: GREEN
  3315. - posn: ['20', ',', '40']
  3316. - x: 20
  3317. - y: 40
  3318. - shape: TRIANGLE
  3319. - size: 20
  3320. """
  3321. def __init__( self, exprs, savelist = True ):
  3322. super(Each,self).__init__(exprs, savelist)
  3323. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3324. self.skipWhitespace = True
  3325. self.initExprGroups = True
  3326. self.saveAsList = True
  3327. def streamline(self):
  3328. super(Each, self).streamline()
  3329. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3330. return self
  3331. def parseImpl( self, instring, loc, doActions=True ):
  3332. if self.initExprGroups:
  3333. self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
  3334. opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
  3335. opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
  3336. self.optionals = opt1 + opt2
  3337. self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
  3338. self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
  3339. self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  3340. self.required += self.multirequired
  3341. self.initExprGroups = False
  3342. tmpLoc = loc
  3343. tmpReqd = self.required[:]
  3344. tmpOpt = self.optionals[:]
  3345. matchOrder = []
  3346. keepMatching = True
  3347. while keepMatching:
  3348. tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  3349. failed = []
  3350. for e in tmpExprs:
  3351. try:
  3352. tmpLoc = e.tryParse( instring, tmpLoc )
  3353. except ParseException:
  3354. failed.append(e)
  3355. else:
  3356. matchOrder.append(self.opt1map.get(id(e),e))
  3357. if e in tmpReqd:
  3358. tmpReqd.remove(e)
  3359. elif e in tmpOpt:
  3360. tmpOpt.remove(e)
  3361. if len(failed) == len(tmpExprs):
  3362. keepMatching = False
  3363. if tmpReqd:
  3364. missing = ", ".join(_ustr(e) for e in tmpReqd)
  3365. raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  3366. # add any unmatched Optionals, in case they have default values defined
  3367. matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
  3368. resultlist = []
  3369. for e in matchOrder:
  3370. loc,results = e._parse(instring,loc,doActions)
  3371. resultlist.append(results)
  3372. finalResults = sum(resultlist, ParseResults([]))
  3373. return loc, finalResults
  3374. def __str__( self ):
  3375. if hasattr(self,"name"):
  3376. return self.name
  3377. if self.strRepr is None:
  3378. self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
  3379. return self.strRepr
  3380. def checkRecursion( self, parseElementList ):
  3381. subRecCheckList = parseElementList[:] + [ self ]
  3382. for e in self.exprs:
  3383. e.checkRecursion( subRecCheckList )
  3384. class ParseElementEnhance(ParserElement):
  3385. """Abstract subclass of :class:`ParserElement`, for combining and
  3386. post-processing parsed tokens.
  3387. """
  3388. def __init__( self, expr, savelist=False ):
  3389. super(ParseElementEnhance,self).__init__(savelist)
  3390. if isinstance( expr, basestring ):
  3391. if issubclass(ParserElement._literalStringClass, Token):
  3392. expr = ParserElement._literalStringClass(expr)
  3393. else:
  3394. expr = ParserElement._literalStringClass(Literal(expr))
  3395. self.expr = expr
  3396. self.strRepr = None
  3397. if expr is not None:
  3398. self.mayIndexError = expr.mayIndexError
  3399. self.mayReturnEmpty = expr.mayReturnEmpty
  3400. self.setWhitespaceChars( expr.whiteChars )
  3401. self.skipWhitespace = expr.skipWhitespace
  3402. self.saveAsList = expr.saveAsList
  3403. self.callPreparse = expr.callPreparse
  3404. self.ignoreExprs.extend(expr.ignoreExprs)
  3405. def parseImpl( self, instring, loc, doActions=True ):
  3406. if self.expr is not None:
  3407. return self.expr._parse( instring, loc, doActions, callPreParse=False )
  3408. else:
  3409. raise ParseException("",loc,self.errmsg,self)
  3410. def leaveWhitespace( self ):
  3411. self.skipWhitespace = False
  3412. self.expr = self.expr.copy()
  3413. if self.expr is not None:
  3414. self.expr.leaveWhitespace()
  3415. return self
  3416. def ignore( self, other ):
  3417. if isinstance( other, Suppress ):
  3418. if other not in self.ignoreExprs:
  3419. super( ParseElementEnhance, self).ignore( other )
  3420. if self.expr is not None:
  3421. self.expr.ignore( self.ignoreExprs[-1] )
  3422. else:
  3423. super( ParseElementEnhance, self).ignore( other )
  3424. if self.expr is not None:
  3425. self.expr.ignore( self.ignoreExprs[-1] )
  3426. return self
  3427. def streamline( self ):
  3428. super(ParseElementEnhance,self).streamline()
  3429. if self.expr is not None:
  3430. self.expr.streamline()
  3431. return self
  3432. def checkRecursion( self, parseElementList ):
  3433. if self in parseElementList:
  3434. raise RecursiveGrammarException( parseElementList+[self] )
  3435. subRecCheckList = parseElementList[:] + [ self ]
  3436. if self.expr is not None:
  3437. self.expr.checkRecursion( subRecCheckList )
  3438. def validate( self, validateTrace=[] ):
  3439. tmp = validateTrace[:]+[self]
  3440. if self.expr is not None:
  3441. self.expr.validate(tmp)
  3442. self.checkRecursion( [] )
  3443. def __str__( self ):
  3444. try:
  3445. return super(ParseElementEnhance,self).__str__()
  3446. except Exception:
  3447. pass
  3448. if self.strRepr is None and self.expr is not None:
  3449. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  3450. return self.strRepr
  3451. class FollowedBy(ParseElementEnhance):
  3452. """Lookahead matching of the given parse expression.
  3453. ``FollowedBy`` does *not* advance the parsing position within
  3454. the input string, it only verifies that the specified parse
  3455. expression matches at the current position. ``FollowedBy``
  3456. always returns a null token list. If any results names are defined
  3457. in the lookahead expression, those *will* be returned for access by
  3458. name.
  3459. Example::
  3460. # use FollowedBy to match a label only if it is followed by a ':'
  3461. data_word = Word(alphas)
  3462. label = data_word + FollowedBy(':')
  3463. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3464. OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
  3465. prints::
  3466. [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
  3467. """
  3468. def __init__( self, expr ):
  3469. super(FollowedBy,self).__init__(expr)
  3470. self.mayReturnEmpty = True
  3471. def parseImpl( self, instring, loc, doActions=True ):
  3472. _, ret = self.expr._parse(instring, loc, doActions=doActions)
  3473. del ret[:]
  3474. return loc, ret
  3475. class PrecededBy(ParseElementEnhance):
  3476. """Lookbehind matching of the given parse expression.
  3477. ``PrecededBy`` does not advance the parsing position within the
  3478. input string, it only verifies that the specified parse expression
  3479. matches prior to the current position. ``PrecededBy`` always
  3480. returns a null token list, but if a results name is defined on the
  3481. given expression, it is returned.
  3482. Parameters:
  3483. - expr - expression that must match prior to the current parse
  3484. location
  3485. - retreat - (default= ``None``) - (int) maximum number of characters
  3486. to lookbehind prior to the current parse location
  3487. If the lookbehind expression is a string, Literal, Keyword, or
  3488. a Word or CharsNotIn with a specified exact or maximum length, then
  3489. the retreat parameter is not required. Otherwise, retreat must be
  3490. specified to give a maximum number of characters to look back from
  3491. the current parse position for a lookbehind match.
  3492. Example::
  3493. # VB-style variable names with type prefixes
  3494. int_var = PrecededBy("#") + pyparsing_common.identifier
  3495. str_var = PrecededBy("$") + pyparsing_common.identifier
  3496. """
  3497. def __init__(self, expr, retreat=None):
  3498. super(PrecededBy, self).__init__(expr)
  3499. self.expr = self.expr().leaveWhitespace()
  3500. self.mayReturnEmpty = True
  3501. self.mayIndexError = False
  3502. self.exact = False
  3503. if isinstance(expr, str):
  3504. retreat = len(expr)
  3505. self.exact = True
  3506. elif isinstance(expr, (Literal, Keyword)):
  3507. retreat = expr.matchLen
  3508. self.exact = True
  3509. elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
  3510. retreat = expr.maxLen
  3511. self.exact = True
  3512. elif isinstance(expr, _PositionToken):
  3513. retreat = 0
  3514. self.exact = True
  3515. self.retreat = retreat
  3516. self.errmsg = "not preceded by " + str(expr)
  3517. self.skipWhitespace = False
  3518. def parseImpl(self, instring, loc=0, doActions=True):
  3519. if self.exact:
  3520. if loc < self.retreat:
  3521. raise ParseException(instring, loc, self.errmsg)
  3522. start = loc - self.retreat
  3523. _, ret = self.expr._parse(instring, start)
  3524. else:
  3525. # retreat specified a maximum lookbehind window, iterate
  3526. test_expr = self.expr + StringEnd()
  3527. instring_slice = instring[:loc]
  3528. last_expr = ParseException(instring, loc, self.errmsg)
  3529. for offset in range(1, min(loc, self.retreat+1)):
  3530. try:
  3531. _, ret = test_expr._parse(instring_slice, loc-offset)
  3532. except ParseBaseException as pbe:
  3533. last_expr = pbe
  3534. else:
  3535. break
  3536. else:
  3537. raise last_expr
  3538. # return empty list of tokens, but preserve any defined results names
  3539. del ret[:]
  3540. return loc, ret
  3541. class NotAny(ParseElementEnhance):
  3542. """Lookahead to disallow matching with the given parse expression.
  3543. ``NotAny`` does *not* advance the parsing position within the
  3544. input string, it only verifies that the specified parse expression
  3545. does *not* match at the current position. Also, ``NotAny`` does
  3546. *not* skip over leading whitespace. ``NotAny`` always returns
  3547. a null token list. May be constructed using the '~' operator.
  3548. Example::
  3549. AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
  3550. # take care not to mistake keywords for identifiers
  3551. ident = ~(AND | OR | NOT) + Word(alphas)
  3552. boolean_term = Optional(NOT) + ident
  3553. # very crude boolean expression - to support parenthesis groups and
  3554. # operation hierarchy, use infixNotation
  3555. boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
  3556. # integers that are followed by "." are actually floats
  3557. integer = Word(nums) + ~Char(".")
  3558. """
  3559. def __init__( self, expr ):
  3560. super(NotAny,self).__init__(expr)
  3561. #~ self.leaveWhitespace()
  3562. self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  3563. self.mayReturnEmpty = True
  3564. self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  3565. def parseImpl( self, instring, loc, doActions=True ):
  3566. if self.expr.canParseNext(instring, loc):
  3567. raise ParseException(instring, loc, self.errmsg, self)
  3568. return loc, []
  3569. def __str__( self ):
  3570. if hasattr(self,"name"):
  3571. return self.name
  3572. if self.strRepr is None:
  3573. self.strRepr = "~{" + _ustr(self.expr) + "}"
  3574. return self.strRepr
  3575. class _MultipleMatch(ParseElementEnhance):
  3576. def __init__( self, expr, stopOn=None):
  3577. super(_MultipleMatch, self).__init__(expr)
  3578. self.saveAsList = True
  3579. ender = stopOn
  3580. if isinstance(ender, basestring):
  3581. ender = ParserElement._literalStringClass(ender)
  3582. self.not_ender = ~ender if ender is not None else None
  3583. def parseImpl( self, instring, loc, doActions=True ):
  3584. self_expr_parse = self.expr._parse
  3585. self_skip_ignorables = self._skipIgnorables
  3586. check_ender = self.not_ender is not None
  3587. if check_ender:
  3588. try_not_ender = self.not_ender.tryParse
  3589. # must be at least one (but first see if we are the stopOn sentinel;
  3590. # if so, fail)
  3591. if check_ender:
  3592. try_not_ender(instring, loc)
  3593. loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
  3594. try:
  3595. hasIgnoreExprs = (not not self.ignoreExprs)
  3596. while 1:
  3597. if check_ender:
  3598. try_not_ender(instring, loc)
  3599. if hasIgnoreExprs:
  3600. preloc = self_skip_ignorables( instring, loc )
  3601. else:
  3602. preloc = loc
  3603. loc, tmptokens = self_expr_parse( instring, preloc, doActions )
  3604. if tmptokens or tmptokens.haskeys():
  3605. tokens += tmptokens
  3606. except (ParseException,IndexError):
  3607. pass
  3608. return loc, tokens
  3609. class OneOrMore(_MultipleMatch):
  3610. """Repetition of one or more of the given expression.
  3611. Parameters:
  3612. - expr - expression that must match one or more times
  3613. - stopOn - (default= ``None``) - expression for a terminating sentinel
  3614. (only required if the sentinel would ordinarily match the repetition
  3615. expression)
  3616. Example::
  3617. data_word = Word(alphas)
  3618. label = data_word + FollowedBy(':')
  3619. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3620. text = "shape: SQUARE posn: upper left color: BLACK"
  3621. OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
  3622. # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
  3623. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3624. OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
  3625. # could also be written as
  3626. (attr_expr * (1,)).parseString(text).pprint()
  3627. """
  3628. def __str__( self ):
  3629. if hasattr(self,"name"):
  3630. return self.name
  3631. if self.strRepr is None:
  3632. self.strRepr = "{" + _ustr(self.expr) + "}..."
  3633. return self.strRepr
  3634. class ZeroOrMore(_MultipleMatch):
  3635. """Optional repetition of zero or more of the given expression.
  3636. Parameters:
  3637. - expr - expression that must match zero or more times
  3638. - stopOn - (default= ``None``) - expression for a terminating sentinel
  3639. (only required if the sentinel would ordinarily match the repetition
  3640. expression)
  3641. Example: similar to :class:`OneOrMore`
  3642. """
  3643. def __init__( self, expr, stopOn=None):
  3644. super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
  3645. self.mayReturnEmpty = True
  3646. def parseImpl( self, instring, loc, doActions=True ):
  3647. try:
  3648. return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
  3649. except (ParseException,IndexError):
  3650. return loc, []
  3651. def __str__( self ):
  3652. if hasattr(self,"name"):
  3653. return self.name
  3654. if self.strRepr is None:
  3655. self.strRepr = "[" + _ustr(self.expr) + "]..."
  3656. return self.strRepr
  3657. class _NullToken(object):
  3658. def __bool__(self):
  3659. return False
  3660. __nonzero__ = __bool__
  3661. def __str__(self):
  3662. return ""
  3663. _optionalNotMatched = _NullToken()
  3664. class Optional(ParseElementEnhance):
  3665. """Optional matching of the given expression.
  3666. Parameters:
  3667. - expr - expression that must match zero or more times
  3668. - default (optional) - value to be returned if the optional expression is not found.
  3669. Example::
  3670. # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
  3671. zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
  3672. zip.runTests('''
  3673. # traditional ZIP code
  3674. 12345
  3675. # ZIP+4 form
  3676. 12101-0001
  3677. # invalid ZIP
  3678. 98765-
  3679. ''')
  3680. prints::
  3681. # traditional ZIP code
  3682. 12345
  3683. ['12345']
  3684. # ZIP+4 form
  3685. 12101-0001
  3686. ['12101-0001']
  3687. # invalid ZIP
  3688. 98765-
  3689. ^
  3690. FAIL: Expected end of text (at char 5), (line:1, col:6)
  3691. """
  3692. def __init__( self, expr, default=_optionalNotMatched ):
  3693. super(Optional,self).__init__( expr, savelist=False )
  3694. self.saveAsList = self.expr.saveAsList
  3695. self.defaultValue = default
  3696. self.mayReturnEmpty = True
  3697. def parseImpl( self, instring, loc, doActions=True ):
  3698. try:
  3699. loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  3700. except (ParseException,IndexError):
  3701. if self.defaultValue is not _optionalNotMatched:
  3702. if self.expr.resultsName:
  3703. tokens = ParseResults([ self.defaultValue ])
  3704. tokens[self.expr.resultsName] = self.defaultValue
  3705. else:
  3706. tokens = [ self.defaultValue ]
  3707. else:
  3708. tokens = []
  3709. return loc, tokens
  3710. def __str__( self ):
  3711. if hasattr(self,"name"):
  3712. return self.name
  3713. if self.strRepr is None:
  3714. self.strRepr = "[" + _ustr(self.expr) + "]"
  3715. return self.strRepr
  3716. class SkipTo(ParseElementEnhance):
  3717. """Token for skipping over all undefined text until the matched
  3718. expression is found.
  3719. Parameters:
  3720. - expr - target expression marking the end of the data to be skipped
  3721. - include - (default= ``False``) if True, the target expression is also parsed
  3722. (the skipped text and target expression are returned as a 2-element list).
  3723. - ignore - (default= ``None``) used to define grammars (typically quoted strings and
  3724. comments) that might contain false matches to the target expression
  3725. - failOn - (default= ``None``) define expressions that are not allowed to be
  3726. included in the skipped test; if found before the target expression is found,
  3727. the SkipTo is not a match
  3728. Example::
  3729. report = '''
  3730. Outstanding Issues Report - 1 Jan 2000
  3731. # | Severity | Description | Days Open
  3732. -----+----------+-------------------------------------------+-----------
  3733. 101 | Critical | Intermittent system crash | 6
  3734. 94 | Cosmetic | Spelling error on Login ('log|n') | 14
  3735. 79 | Minor | System slow when running too many reports | 47
  3736. '''
  3737. integer = Word(nums)
  3738. SEP = Suppress('|')
  3739. # use SkipTo to simply match everything up until the next SEP
  3740. # - ignore quoted strings, so that a '|' character inside a quoted string does not match
  3741. # - parse action will call token.strip() for each matched token, i.e., the description body
  3742. string_data = SkipTo(SEP, ignore=quotedString)
  3743. string_data.setParseAction(tokenMap(str.strip))
  3744. ticket_expr = (integer("issue_num") + SEP
  3745. + string_data("sev") + SEP
  3746. + string_data("desc") + SEP
  3747. + integer("days_open"))
  3748. for tkt in ticket_expr.searchString(report):
  3749. print tkt.dump()
  3750. prints::
  3751. ['101', 'Critical', 'Intermittent system crash', '6']
  3752. - days_open: 6
  3753. - desc: Intermittent system crash
  3754. - issue_num: 101
  3755. - sev: Critical
  3756. ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
  3757. - days_open: 14
  3758. - desc: Spelling error on Login ('log|n')
  3759. - issue_num: 94
  3760. - sev: Cosmetic
  3761. ['79', 'Minor', 'System slow when running too many reports', '47']
  3762. - days_open: 47
  3763. - desc: System slow when running too many reports
  3764. - issue_num: 79
  3765. - sev: Minor
  3766. """
  3767. def __init__( self, other, include=False, ignore=None, failOn=None ):
  3768. super( SkipTo, self ).__init__( other )
  3769. self.ignoreExpr = ignore
  3770. self.mayReturnEmpty = True
  3771. self.mayIndexError = False
  3772. self.includeMatch = include
  3773. self.saveAsList = False
  3774. if isinstance(failOn, basestring):
  3775. self.failOn = ParserElement._literalStringClass(failOn)
  3776. else:
  3777. self.failOn = failOn
  3778. self.errmsg = "No match found for "+_ustr(self.expr)
  3779. def parseImpl( self, instring, loc, doActions=True ):
  3780. startloc = loc
  3781. instrlen = len(instring)
  3782. expr = self.expr
  3783. expr_parse = self.expr._parse
  3784. self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
  3785. self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
  3786. tmploc = loc
  3787. while tmploc <= instrlen:
  3788. if self_failOn_canParseNext is not None:
  3789. # break if failOn expression matches
  3790. if self_failOn_canParseNext(instring, tmploc):
  3791. break
  3792. if self_ignoreExpr_tryParse is not None:
  3793. # advance past ignore expressions
  3794. while 1:
  3795. try:
  3796. tmploc = self_ignoreExpr_tryParse(instring, tmploc)
  3797. except ParseBaseException:
  3798. break
  3799. try:
  3800. expr_parse(instring, tmploc, doActions=False, callPreParse=False)
  3801. except (ParseException, IndexError):
  3802. # no match, advance loc in string
  3803. tmploc += 1
  3804. else:
  3805. # matched skipto expr, done
  3806. break
  3807. else:
  3808. # ran off the end of the input string without matching skipto expr, fail
  3809. raise ParseException(instring, loc, self.errmsg, self)
  3810. # build up return values
  3811. loc = tmploc
  3812. skiptext = instring[startloc:loc]
  3813. skipresult = ParseResults(skiptext)
  3814. if self.includeMatch:
  3815. loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
  3816. skipresult += mat
  3817. return loc, skipresult
  3818. class Forward(ParseElementEnhance):
  3819. """Forward declaration of an expression to be defined later -
  3820. used for recursive grammars, such as algebraic infix notation.
  3821. When the expression is known, it is assigned to the ``Forward``
  3822. variable using the '<<' operator.
  3823. Note: take care when assigning to ``Forward`` not to overlook
  3824. precedence of operators.
  3825. Specifically, '|' has a lower precedence than '<<', so that::
  3826. fwdExpr << a | b | c
  3827. will actually be evaluated as::
  3828. (fwdExpr << a) | b | c
  3829. thereby leaving b and c out as parseable alternatives. It is recommended that you
  3830. explicitly group the values inserted into the ``Forward``::
  3831. fwdExpr << (a | b | c)
  3832. Converting to use the '<<=' operator instead will avoid this problem.
  3833. See :class:`ParseResults.pprint` for an example of a recursive
  3834. parser created using ``Forward``.
  3835. """
  3836. def __init__( self, other=None ):
  3837. super(Forward,self).__init__( other, savelist=False )
  3838. def __lshift__( self, other ):
  3839. if isinstance( other, basestring ):
  3840. other = ParserElement._literalStringClass(other)
  3841. self.expr = other
  3842. self.strRepr = None
  3843. self.mayIndexError = self.expr.mayIndexError
  3844. self.mayReturnEmpty = self.expr.mayReturnEmpty
  3845. self.setWhitespaceChars( self.expr.whiteChars )
  3846. self.skipWhitespace = self.expr.skipWhitespace
  3847. self.saveAsList = self.expr.saveAsList
  3848. self.ignoreExprs.extend(self.expr.ignoreExprs)
  3849. return self
  3850. def __ilshift__(self, other):
  3851. return self << other
  3852. def leaveWhitespace( self ):
  3853. self.skipWhitespace = False
  3854. return self
  3855. def streamline( self ):
  3856. if not self.streamlined:
  3857. self.streamlined = True
  3858. if self.expr is not None:
  3859. self.expr.streamline()
  3860. return self
  3861. def validate( self, validateTrace=[] ):
  3862. if self not in validateTrace:
  3863. tmp = validateTrace[:]+[self]
  3864. if self.expr is not None:
  3865. self.expr.validate(tmp)
  3866. self.checkRecursion([])
  3867. def __str__( self ):
  3868. if hasattr(self,"name"):
  3869. return self.name
  3870. return self.__class__.__name__ + ": ..."
  3871. # stubbed out for now - creates awful memory and perf issues
  3872. self._revertClass = self.__class__
  3873. self.__class__ = _ForwardNoRecurse
  3874. try:
  3875. if self.expr is not None:
  3876. retString = _ustr(self.expr)
  3877. else:
  3878. retString = "None"
  3879. finally:
  3880. self.__class__ = self._revertClass
  3881. return self.__class__.__name__ + ": " + retString
  3882. def copy(self):
  3883. if self.expr is not None:
  3884. return super(Forward,self).copy()
  3885. else:
  3886. ret = Forward()
  3887. ret <<= self
  3888. return ret
  3889. class _ForwardNoRecurse(Forward):
  3890. def __str__( self ):
  3891. return "..."
  3892. class TokenConverter(ParseElementEnhance):
  3893. """
  3894. Abstract subclass of :class:`ParseExpression`, for converting parsed results.
  3895. """
  3896. def __init__( self, expr, savelist=False ):
  3897. super(TokenConverter,self).__init__( expr )#, savelist )
  3898. self.saveAsList = False
  3899. class Combine(TokenConverter):
  3900. """Converter to concatenate all matching tokens to a single string.
  3901. By default, the matching patterns must also be contiguous in the
  3902. input string; this can be disabled by specifying
  3903. ``'adjacent=False'`` in the constructor.
  3904. Example::
  3905. real = Word(nums) + '.' + Word(nums)
  3906. print(real.parseString('3.1416')) # -> ['3', '.', '1416']
  3907. # will also erroneously match the following
  3908. print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
  3909. real = Combine(Word(nums) + '.' + Word(nums))
  3910. print(real.parseString('3.1416')) # -> ['3.1416']
  3911. # no match when there are internal spaces
  3912. print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
  3913. """
  3914. def __init__( self, expr, joinString="", adjacent=True ):
  3915. super(Combine,self).__init__( expr )
  3916. # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  3917. if adjacent:
  3918. self.leaveWhitespace()
  3919. self.adjacent = adjacent
  3920. self.skipWhitespace = True
  3921. self.joinString = joinString
  3922. self.callPreparse = True
  3923. def ignore( self, other ):
  3924. if self.adjacent:
  3925. ParserElement.ignore(self, other)
  3926. else:
  3927. super( Combine, self).ignore( other )
  3928. return self
  3929. def postParse( self, instring, loc, tokenlist ):
  3930. retToks = tokenlist.copy()
  3931. del retToks[:]
  3932. retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  3933. if self.resultsName and retToks.haskeys():
  3934. return [ retToks ]
  3935. else:
  3936. return retToks
  3937. class Group(TokenConverter):
  3938. """Converter to return the matched tokens as a list - useful for
  3939. returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
  3940. Example::
  3941. ident = Word(alphas)
  3942. num = Word(nums)
  3943. term = ident | num
  3944. func = ident + Optional(delimitedList(term))
  3945. print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
  3946. func = ident + Group(Optional(delimitedList(term)))
  3947. print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
  3948. """
  3949. def __init__( self, expr ):
  3950. super(Group,self).__init__( expr )
  3951. self.saveAsList = expr.saveAsList
  3952. def postParse( self, instring, loc, tokenlist ):
  3953. return [ tokenlist ]
  3954. class Dict(TokenConverter):
  3955. """Converter to return a repetitive expression as a list, but also
  3956. as a dictionary. Each element can also be referenced using the first
  3957. token in the expression as its key. Useful for tabular report
  3958. scraping when the first column can be used as a item key.
  3959. Example::
  3960. data_word = Word(alphas)
  3961. label = data_word + FollowedBy(':')
  3962. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3963. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3964. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3965. # print attributes as plain groups
  3966. print(OneOrMore(attr_expr).parseString(text).dump())
  3967. # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
  3968. result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
  3969. print(result.dump())
  3970. # access named fields as dict entries, or output as dict
  3971. print(result['shape'])
  3972. print(result.asDict())
  3973. prints::
  3974. ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
  3975. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3976. - color: light blue
  3977. - posn: upper left
  3978. - shape: SQUARE
  3979. - texture: burlap
  3980. SQUARE
  3981. {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
  3982. See more examples at :class:`ParseResults` of accessing fields by results name.
  3983. """
  3984. def __init__( self, expr ):
  3985. super(Dict,self).__init__( expr )
  3986. self.saveAsList = True
  3987. def postParse( self, instring, loc, tokenlist ):
  3988. for i,tok in enumerate(tokenlist):
  3989. if len(tok) == 0:
  3990. continue
  3991. ikey = tok[0]
  3992. if isinstance(ikey,int):
  3993. ikey = _ustr(tok[0]).strip()
  3994. if len(tok)==1:
  3995. tokenlist[ikey] = _ParseResultsWithOffset("",i)
  3996. elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  3997. tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
  3998. else:
  3999. dictvalue = tok.copy() #ParseResults(i)
  4000. del dictvalue[0]
  4001. if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
  4002. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
  4003. else:
  4004. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
  4005. if self.resultsName:
  4006. return [ tokenlist ]
  4007. else:
  4008. return tokenlist
  4009. class Suppress(TokenConverter):
  4010. """Converter for ignoring the results of a parsed expression.
  4011. Example::
  4012. source = "a, b, c,d"
  4013. wd = Word(alphas)
  4014. wd_list1 = wd + ZeroOrMore(',' + wd)
  4015. print(wd_list1.parseString(source))
  4016. # often, delimiters that are useful during parsing are just in the
  4017. # way afterward - use Suppress to keep them out of the parsed output
  4018. wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
  4019. print(wd_list2.parseString(source))
  4020. prints::
  4021. ['a', ',', 'b', ',', 'c', ',', 'd']
  4022. ['a', 'b', 'c', 'd']
  4023. (See also :class:`delimitedList`.)
  4024. """
  4025. def postParse( self, instring, loc, tokenlist ):
  4026. return []
  4027. def suppress( self ):
  4028. return self
  4029. class OnlyOnce(object):
  4030. """Wrapper for parse actions, to ensure they are only called once.
  4031. """
  4032. def __init__(self, methodCall):
  4033. self.callable = _trim_arity(methodCall)
  4034. self.called = False
  4035. def __call__(self,s,l,t):
  4036. if not self.called:
  4037. results = self.callable(s,l,t)
  4038. self.called = True
  4039. return results
  4040. raise ParseException(s,l,"")
  4041. def reset(self):
  4042. self.called = False
  4043. def traceParseAction(f):
  4044. """Decorator for debugging parse actions.
  4045. When the parse action is called, this decorator will print
  4046. ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
  4047. When the parse action completes, the decorator will print
  4048. ``"<<"`` followed by the returned value, or any exception that the parse action raised.
  4049. Example::
  4050. wd = Word(alphas)
  4051. @traceParseAction
  4052. def remove_duplicate_chars(tokens):
  4053. return ''.join(sorted(set(''.join(tokens))))
  4054. wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
  4055. print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
  4056. prints::
  4057. >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
  4058. <<leaving remove_duplicate_chars (ret: 'dfjkls')
  4059. ['dfjkls']
  4060. """
  4061. f = _trim_arity(f)
  4062. def z(*paArgs):
  4063. thisFunc = f.__name__
  4064. s,l,t = paArgs[-3:]
  4065. if len(paArgs)>3:
  4066. thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  4067. sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
  4068. try:
  4069. ret = f(*paArgs)
  4070. except Exception as exc:
  4071. sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  4072. raise
  4073. sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
  4074. return ret
  4075. try:
  4076. z.__name__ = f.__name__
  4077. except AttributeError:
  4078. pass
  4079. return z
  4080. #
  4081. # global helpers
  4082. #
  4083. def delimitedList( expr, delim=",", combine=False ):
  4084. """Helper to define a delimited list of expressions - the delimiter
  4085. defaults to ','. By default, the list elements and delimiters can
  4086. have intervening whitespace, and comments, but this can be
  4087. overridden by passing ``combine=True`` in the constructor. If
  4088. ``combine`` is set to ``True``, the matching tokens are
  4089. returned as a single token string, with the delimiters included;
  4090. otherwise, the matching tokens are returned as a list of tokens,
  4091. with the delimiters suppressed.
  4092. Example::
  4093. delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
  4094. delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
  4095. """
  4096. dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  4097. if combine:
  4098. return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  4099. else:
  4100. return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  4101. def countedArray( expr, intExpr=None ):
  4102. """Helper to define a counted list of expressions.
  4103. This helper defines a pattern of the form::
  4104. integer expr expr expr...
  4105. where the leading integer tells how many expr expressions follow.
  4106. The matched tokens returns the array of expr tokens as a list - the
  4107. leading count token is suppressed.
  4108. If ``intExpr`` is specified, it should be a pyparsing expression
  4109. that produces an integer value.
  4110. Example::
  4111. countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
  4112. # in this parser, the leading integer value is given in binary,
  4113. # '10' indicating that 2 values are in the array
  4114. binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
  4115. countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
  4116. """
  4117. arrayExpr = Forward()
  4118. def countFieldParseAction(s,l,t):
  4119. n = t[0]
  4120. arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  4121. return []
  4122. if intExpr is None:
  4123. intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
  4124. else:
  4125. intExpr = intExpr.copy()
  4126. intExpr.setName("arrayLen")
  4127. intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
  4128. return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
  4129. def _flatten(L):
  4130. ret = []
  4131. for i in L:
  4132. if isinstance(i,list):
  4133. ret.extend(_flatten(i))
  4134. else:
  4135. ret.append(i)
  4136. return ret
  4137. def matchPreviousLiteral(expr):
  4138. """Helper to define an expression that is indirectly defined from
  4139. the tokens matched in a previous expression, that is, it looks for
  4140. a 'repeat' of a previous expression. For example::
  4141. first = Word(nums)
  4142. second = matchPreviousLiteral(first)
  4143. matchExpr = first + ":" + second
  4144. will match ``"1:1"``, but not ``"1:2"``. Because this
  4145. matches a previous literal, will also match the leading
  4146. ``"1:1"`` in ``"1:10"``. If this is not desired, use
  4147. :class:`matchPreviousExpr`. Do *not* use with packrat parsing
  4148. enabled.
  4149. """
  4150. rep = Forward()
  4151. def copyTokenToRepeater(s,l,t):
  4152. if t:
  4153. if len(t) == 1:
  4154. rep << t[0]
  4155. else:
  4156. # flatten t tokens
  4157. tflat = _flatten(t.asList())
  4158. rep << And(Literal(tt) for tt in tflat)
  4159. else:
  4160. rep << Empty()
  4161. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  4162. rep.setName('(prev) ' + _ustr(expr))
  4163. return rep
  4164. def matchPreviousExpr(expr):
  4165. """Helper to define an expression that is indirectly defined from
  4166. the tokens matched in a previous expression, that is, it looks for
  4167. a 'repeat' of a previous expression. For example::
  4168. first = Word(nums)
  4169. second = matchPreviousExpr(first)
  4170. matchExpr = first + ":" + second
  4171. will match ``"1:1"``, but not ``"1:2"``. Because this
  4172. matches by expressions, will *not* match the leading ``"1:1"``
  4173. in ``"1:10"``; the expressions are evaluated first, and then
  4174. compared, so ``"1"`` is compared with ``"10"``. Do *not* use
  4175. with packrat parsing enabled.
  4176. """
  4177. rep = Forward()
  4178. e2 = expr.copy()
  4179. rep <<= e2
  4180. def copyTokenToRepeater(s,l,t):
  4181. matchTokens = _flatten(t.asList())
  4182. def mustMatchTheseTokens(s,l,t):
  4183. theseTokens = _flatten(t.asList())
  4184. if theseTokens != matchTokens:
  4185. raise ParseException("",0,"")
  4186. rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
  4187. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  4188. rep.setName('(prev) ' + _ustr(expr))
  4189. return rep
  4190. def _escapeRegexRangeChars(s):
  4191. #~ escape these chars: ^-]
  4192. for c in r"\^-]":
  4193. s = s.replace(c,_bslash+c)
  4194. s = s.replace("\n",r"\n")
  4195. s = s.replace("\t",r"\t")
  4196. return _ustr(s)
  4197. def oneOf( strs, caseless=False, useRegex=True ):
  4198. """Helper to quickly define a set of alternative Literals, and makes
  4199. sure to do longest-first testing when there is a conflict,
  4200. regardless of the input order, but returns
  4201. a :class:`MatchFirst` for best performance.
  4202. Parameters:
  4203. - strs - a string of space-delimited literals, or a collection of
  4204. string literals
  4205. - caseless - (default= ``False``) - treat all literals as
  4206. caseless
  4207. - useRegex - (default= ``True``) - as an optimization, will
  4208. generate a Regex object; otherwise, will generate
  4209. a :class:`MatchFirst` object (if ``caseless=True``, or if
  4210. creating a :class:`Regex` raises an exception)
  4211. Example::
  4212. comp_oper = oneOf("< = > <= >= !=")
  4213. var = Word(alphas)
  4214. number = Word(nums)
  4215. term = var | number
  4216. comparison_expr = term + comp_oper + term
  4217. print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
  4218. prints::
  4219. [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
  4220. """
  4221. if caseless:
  4222. isequal = ( lambda a,b: a.upper() == b.upper() )
  4223. masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  4224. parseElementClass = CaselessLiteral
  4225. else:
  4226. isequal = ( lambda a,b: a == b )
  4227. masks = ( lambda a,b: b.startswith(a) )
  4228. parseElementClass = Literal
  4229. symbols = []
  4230. if isinstance(strs,basestring):
  4231. symbols = strs.split()
  4232. elif isinstance(strs, Iterable):
  4233. symbols = list(strs)
  4234. else:
  4235. warnings.warn("Invalid argument to oneOf, expected string or iterable",
  4236. SyntaxWarning, stacklevel=2)
  4237. if not symbols:
  4238. return NoMatch()
  4239. i = 0
  4240. while i < len(symbols)-1:
  4241. cur = symbols[i]
  4242. for j,other in enumerate(symbols[i+1:]):
  4243. if ( isequal(other, cur) ):
  4244. del symbols[i+j+1]
  4245. break
  4246. elif ( masks(cur, other) ):
  4247. del symbols[i+j+1]
  4248. symbols.insert(i,other)
  4249. cur = other
  4250. break
  4251. else:
  4252. i += 1
  4253. if not caseless and useRegex:
  4254. #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
  4255. try:
  4256. if len(symbols)==len("".join(symbols)):
  4257. return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
  4258. else:
  4259. return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
  4260. except Exception:
  4261. warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  4262. SyntaxWarning, stacklevel=2)
  4263. # last resort, just use MatchFirst
  4264. return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
  4265. def dictOf( key, value ):
  4266. """Helper to easily and clearly define a dictionary by specifying
  4267. the respective patterns for the key and value. Takes care of
  4268. defining the :class:`Dict`, :class:`ZeroOrMore`, and
  4269. :class:`Group` tokens in the proper order. The key pattern
  4270. can include delimiting markers or punctuation, as long as they are
  4271. suppressed, thereby leaving the significant key text. The value
  4272. pattern can include named results, so that the :class:`Dict` results
  4273. can include named token fields.
  4274. Example::
  4275. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  4276. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  4277. print(OneOrMore(attr_expr).parseString(text).dump())
  4278. attr_label = label
  4279. attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
  4280. # similar to Dict, but simpler call format
  4281. result = dictOf(attr_label, attr_value).parseString(text)
  4282. print(result.dump())
  4283. print(result['shape'])
  4284. print(result.shape) # object attribute access works too
  4285. print(result.asDict())
  4286. prints::
  4287. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  4288. - color: light blue
  4289. - posn: upper left
  4290. - shape: SQUARE
  4291. - texture: burlap
  4292. SQUARE
  4293. SQUARE
  4294. {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
  4295. """
  4296. return Dict(OneOrMore(Group(key + value)))
  4297. def originalTextFor(expr, asString=True):
  4298. """Helper to return the original, untokenized text for a given
  4299. expression. Useful to restore the parsed fields of an HTML start
  4300. tag into the raw tag text itself, or to revert separate tokens with
  4301. intervening whitespace back to the original matching input text. By
  4302. default, returns astring containing the original parsed text.
  4303. If the optional ``asString`` argument is passed as
  4304. ``False``, then the return value is
  4305. a :class:`ParseResults` containing any results names that
  4306. were originally matched, and a single token containing the original
  4307. matched text from the input string. So if the expression passed to
  4308. :class:`originalTextFor` contains expressions with defined
  4309. results names, you must set ``asString`` to ``False`` if you
  4310. want to preserve those results name values.
  4311. Example::
  4312. src = "this is test <b> bold <i>text</i> </b> normal text "
  4313. for tag in ("b","i"):
  4314. opener,closer = makeHTMLTags(tag)
  4315. patt = originalTextFor(opener + SkipTo(closer) + closer)
  4316. print(patt.searchString(src)[0])
  4317. prints::
  4318. ['<b> bold <i>text</i> </b>']
  4319. ['<i>text</i>']
  4320. """
  4321. locMarker = Empty().setParseAction(lambda s,loc,t: loc)
  4322. endlocMarker = locMarker.copy()
  4323. endlocMarker.callPreparse = False
  4324. matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
  4325. if asString:
  4326. extractText = lambda s,l,t: s[t._original_start:t._original_end]
  4327. else:
  4328. def extractText(s,l,t):
  4329. t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
  4330. matchExpr.setParseAction(extractText)
  4331. matchExpr.ignoreExprs = expr.ignoreExprs
  4332. return matchExpr
  4333. def ungroup(expr):
  4334. """Helper to undo pyparsing's default grouping of And expressions,
  4335. even if all but one are non-empty.
  4336. """
  4337. return TokenConverter(expr).setParseAction(lambda t:t[0])
  4338. def locatedExpr(expr):
  4339. """Helper to decorate a returned token with its starting and ending
  4340. locations in the input string.
  4341. This helper adds the following results names:
  4342. - locn_start = location where matched expression begins
  4343. - locn_end = location where matched expression ends
  4344. - value = the actual parsed results
  4345. Be careful if the input text contains ``<TAB>`` characters, you
  4346. may want to call :class:`ParserElement.parseWithTabs`
  4347. Example::
  4348. wd = Word(alphas)
  4349. for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
  4350. print(match)
  4351. prints::
  4352. [[0, 'ljsdf', 5]]
  4353. [[8, 'lksdjjf', 15]]
  4354. [[18, 'lkkjj', 23]]
  4355. """
  4356. locator = Empty().setParseAction(lambda s,l,t: l)
  4357. return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
  4358. # convenience constants for positional expressions
  4359. empty = Empty().setName("empty")
  4360. lineStart = LineStart().setName("lineStart")
  4361. lineEnd = LineEnd().setName("lineEnd")
  4362. stringStart = StringStart().setName("stringStart")
  4363. stringEnd = StringEnd().setName("stringEnd")
  4364. _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  4365. _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
  4366. _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
  4367. _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
  4368. _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  4369. _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  4370. def srange(s):
  4371. r"""Helper to easily define string ranges for use in Word
  4372. construction. Borrows syntax from regexp '[]' string range
  4373. definitions::
  4374. srange("[0-9]") -> "0123456789"
  4375. srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
  4376. srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  4377. The input string must be enclosed in []'s, and the returned string
  4378. is the expanded character set joined into a single string. The
  4379. values enclosed in the []'s may be:
  4380. - a single character
  4381. - an escaped character with a leading backslash (such as ``\-``
  4382. or ``\]``)
  4383. - an escaped hex character with a leading ``'\x'``
  4384. (``\x21``, which is a ``'!'`` character) (``\0x##``
  4385. is also supported for backwards compatibility)
  4386. - an escaped octal character with a leading ``'\0'``
  4387. (``\041``, which is a ``'!'`` character)
  4388. - a range of any of the above, separated by a dash (``'a-z'``,
  4389. etc.)
  4390. - any combination of the above (``'aeiouy'``,
  4391. ``'a-zA-Z0-9_$'``, etc.)
  4392. """
  4393. _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
  4394. try:
  4395. return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
  4396. except Exception:
  4397. return ""
  4398. def matchOnlyAtCol(n):
  4399. """Helper method for defining parse actions that require matching at
  4400. a specific column in the input text.
  4401. """
  4402. def verifyCol(strg,locn,toks):
  4403. if col(locn,strg) != n:
  4404. raise ParseException(strg,locn,"matched token not at column %d" % n)
  4405. return verifyCol
  4406. def replaceWith(replStr):
  4407. """Helper method for common parse actions that simply return
  4408. a literal value. Especially useful when used with
  4409. :class:`transformString<ParserElement.transformString>` ().
  4410. Example::
  4411. num = Word(nums).setParseAction(lambda toks: int(toks[0]))
  4412. na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
  4413. term = na | num
  4414. OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
  4415. """
  4416. return lambda s,l,t: [replStr]
  4417. def removeQuotes(s,l,t):
  4418. """Helper parse action for removing quotation marks from parsed
  4419. quoted strings.
  4420. Example::
  4421. # by default, quotation marks are included in parsed results
  4422. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
  4423. # use removeQuotes to strip quotation marks from parsed results
  4424. quotedString.setParseAction(removeQuotes)
  4425. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
  4426. """
  4427. return t[0][1:-1]
  4428. def tokenMap(func, *args):
  4429. """Helper to define a parse action by mapping a function to all
  4430. elements of a ParseResults list. If any additional args are passed,
  4431. they are forwarded to the given function as additional arguments
  4432. after the token, as in
  4433. ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
  4434. which will convert the parsed data to an integer using base 16.
  4435. Example (compare the last to example in :class:`ParserElement.transformString`::
  4436. hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
  4437. hex_ints.runTests('''
  4438. 00 11 22 aa FF 0a 0d 1a
  4439. ''')
  4440. upperword = Word(alphas).setParseAction(tokenMap(str.upper))
  4441. OneOrMore(upperword).runTests('''
  4442. my kingdom for a horse
  4443. ''')
  4444. wd = Word(alphas).setParseAction(tokenMap(str.title))
  4445. OneOrMore(wd).setParseAction(' '.join).runTests('''
  4446. now is the winter of our discontent made glorious summer by this sun of york
  4447. ''')
  4448. prints::
  4449. 00 11 22 aa FF 0a 0d 1a
  4450. [0, 17, 34, 170, 255, 10, 13, 26]
  4451. my kingdom for a horse
  4452. ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
  4453. now is the winter of our discontent made glorious summer by this sun of york
  4454. ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
  4455. """
  4456. def pa(s,l,t):
  4457. return [func(tokn, *args) for tokn in t]
  4458. try:
  4459. func_name = getattr(func, '__name__',
  4460. getattr(func, '__class__').__name__)
  4461. except Exception:
  4462. func_name = str(func)
  4463. pa.__name__ = func_name
  4464. return pa
  4465. upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
  4466. """(Deprecated) Helper parse action to convert tokens to upper case.
  4467. Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
  4468. downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
  4469. """(Deprecated) Helper parse action to convert tokens to lower case.
  4470. Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
  4471. def _makeTags(tagStr, xml):
  4472. """Internal helper to construct opening and closing tag expressions, given a tag name"""
  4473. if isinstance(tagStr,basestring):
  4474. resname = tagStr
  4475. tagStr = Keyword(tagStr, caseless=not xml)
  4476. else:
  4477. resname = tagStr.name
  4478. tagAttrName = Word(alphas,alphanums+"_-:")
  4479. if (xml):
  4480. tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  4481. openTag = Suppress("<") + tagStr("tag") + \
  4482. Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
  4483. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4484. else:
  4485. printablesLessRAbrack = "".join(c for c in printables if c not in ">")
  4486. tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
  4487. openTag = Suppress("<") + tagStr("tag") + \
  4488. Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
  4489. Optional( Suppress("=") + tagAttrValue ) ))) + \
  4490. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4491. closeTag = Combine(_L("</") + tagStr + ">")
  4492. openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
  4493. closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
  4494. openTag.tag = resname
  4495. closeTag.tag = resname
  4496. return openTag, closeTag
  4497. def makeHTMLTags(tagStr):
  4498. """Helper to construct opening and closing tag expressions for HTML,
  4499. given a tag name. Matches tags in either upper or lower case,
  4500. attributes with namespaces and with quoted or unquoted values.
  4501. Example::
  4502. text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
  4503. # makeHTMLTags returns pyparsing expressions for the opening and
  4504. # closing tags as a 2-tuple
  4505. a,a_end = makeHTMLTags("A")
  4506. link_expr = a + SkipTo(a_end)("link_text") + a_end
  4507. for link in link_expr.searchString(text):
  4508. # attributes in the <A> tag (like "href" shown here) are
  4509. # also accessible as named results
  4510. print(link.link_text, '->', link.href)
  4511. prints::
  4512. pyparsing -> https://github.com/pyparsing/pyparsing/wiki
  4513. """
  4514. return _makeTags( tagStr, False )
  4515. def makeXMLTags(tagStr):
  4516. """Helper to construct opening and closing tag expressions for XML,
  4517. given a tag name. Matches tags only in the given upper/lower case.
  4518. Example: similar to :class:`makeHTMLTags`
  4519. """
  4520. return _makeTags( tagStr, True )
  4521. def withAttribute(*args,**attrDict):
  4522. """Helper to create a validating parse action to be used with start
  4523. tags created with :class:`makeXMLTags` or
  4524. :class:`makeHTMLTags`. Use ``withAttribute`` to qualify
  4525. a starting tag with a required attribute value, to avoid false
  4526. matches on common tags such as ``<TD>`` or ``<DIV>``.
  4527. Call ``withAttribute`` with a series of attribute names and
  4528. values. Specify the list of filter attributes names and values as:
  4529. - keyword arguments, as in ``(align="right")``, or
  4530. - as an explicit dict with ``**`` operator, when an attribute
  4531. name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
  4532. - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align","right"))``
  4533. For attribute names with a namespace prefix, you must use the second
  4534. form. Attribute names are matched insensitive to upper/lower case.
  4535. If just testing for ``class`` (with or without a namespace), use
  4536. :class:`withClass`.
  4537. To verify that the attribute exists, but without specifying a value,
  4538. pass ``withAttribute.ANY_VALUE`` as the value.
  4539. Example::
  4540. html = '''
  4541. <div>
  4542. Some text
  4543. <div type="grid">1 4 0 1 0</div>
  4544. <div type="graph">1,3 2,3 1,1</div>
  4545. <div>this has no type</div>
  4546. </div>
  4547. '''
  4548. div,div_end = makeHTMLTags("div")
  4549. # only match div tag having a type attribute with value "grid"
  4550. div_grid = div().setParseAction(withAttribute(type="grid"))
  4551. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4552. for grid_header in grid_expr.searchString(html):
  4553. print(grid_header.body)
  4554. # construct a match with any div tag having a type attribute, regardless of the value
  4555. div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
  4556. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4557. for div_header in div_expr.searchString(html):
  4558. print(div_header.body)
  4559. prints::
  4560. 1 4 0 1 0
  4561. 1 4 0 1 0
  4562. 1,3 2,3 1,1
  4563. """
  4564. if args:
  4565. attrs = args[:]
  4566. else:
  4567. attrs = attrDict.items()
  4568. attrs = [(k,v) for k,v in attrs]
  4569. def pa(s,l,tokens):
  4570. for attrName,attrValue in attrs:
  4571. if attrName not in tokens:
  4572. raise ParseException(s,l,"no matching attribute " + attrName)
  4573. if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
  4574. raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
  4575. (attrName, tokens[attrName], attrValue))
  4576. return pa
  4577. withAttribute.ANY_VALUE = object()
  4578. def withClass(classname, namespace=''):
  4579. """Simplified version of :class:`withAttribute` when
  4580. matching on a div class - made difficult because ``class`` is
  4581. a reserved word in Python.
  4582. Example::
  4583. html = '''
  4584. <div>
  4585. Some text
  4586. <div class="grid">1 4 0 1 0</div>
  4587. <div class="graph">1,3 2,3 1,1</div>
  4588. <div>this &lt;div&gt; has no class</div>
  4589. </div>
  4590. '''
  4591. div,div_end = makeHTMLTags("div")
  4592. div_grid = div().setParseAction(withClass("grid"))
  4593. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4594. for grid_header in grid_expr.searchString(html):
  4595. print(grid_header.body)
  4596. div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
  4597. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4598. for div_header in div_expr.searchString(html):
  4599. print(div_header.body)
  4600. prints::
  4601. 1 4 0 1 0
  4602. 1 4 0 1 0
  4603. 1,3 2,3 1,1
  4604. """
  4605. classattr = "%s:class" % namespace if namespace else "class"
  4606. return withAttribute(**{classattr : classname})
  4607. opAssoc = SimpleNamespace()
  4608. opAssoc.LEFT = object()
  4609. opAssoc.RIGHT = object()
  4610. def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
  4611. """Helper method for constructing grammars of expressions made up of
  4612. operators working in a precedence hierarchy. Operators may be unary
  4613. or binary, left- or right-associative. Parse actions can also be
  4614. attached to operator expressions. The generated parser will also
  4615. recognize the use of parentheses to override operator precedences
  4616. (see example below).
  4617. Note: if you define a deep operator list, you may see performance
  4618. issues when using infixNotation. See
  4619. :class:`ParserElement.enablePackrat` for a mechanism to potentially
  4620. improve your parser performance.
  4621. Parameters:
  4622. - baseExpr - expression representing the most basic element for the
  4623. nested
  4624. - opList - list of tuples, one for each operator precedence level
  4625. in the expression grammar; each tuple is of the form ``(opExpr,
  4626. numTerms, rightLeftAssoc, parseAction)``, where:
  4627. - opExpr is the pyparsing expression for the operator; may also
  4628. be a string, which will be converted to a Literal; if numTerms
  4629. is 3, opExpr is a tuple of two expressions, for the two
  4630. operators separating the 3 terms
  4631. - numTerms is the number of terms for this operator (must be 1,
  4632. 2, or 3)
  4633. - rightLeftAssoc is the indicator whether the operator is right
  4634. or left associative, using the pyparsing-defined constants
  4635. ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
  4636. - parseAction is the parse action to be associated with
  4637. expressions matching this operator expression (the parse action
  4638. tuple member may be omitted); if the parse action is passed
  4639. a tuple or list of functions, this is equivalent to calling
  4640. ``setParseAction(*fn)``
  4641. (:class:`ParserElement.setParseAction`)
  4642. - lpar - expression for matching left-parentheses
  4643. (default= ``Suppress('(')``)
  4644. - rpar - expression for matching right-parentheses
  4645. (default= ``Suppress(')')``)
  4646. Example::
  4647. # simple example of four-function arithmetic with ints and
  4648. # variable names
  4649. integer = pyparsing_common.signed_integer
  4650. varname = pyparsing_common.identifier
  4651. arith_expr = infixNotation(integer | varname,
  4652. [
  4653. ('-', 1, opAssoc.RIGHT),
  4654. (oneOf('* /'), 2, opAssoc.LEFT),
  4655. (oneOf('+ -'), 2, opAssoc.LEFT),
  4656. ])
  4657. arith_expr.runTests('''
  4658. 5+3*6
  4659. (5+3)*6
  4660. -2--11
  4661. ''', fullDump=False)
  4662. prints::
  4663. 5+3*6
  4664. [[5, '+', [3, '*', 6]]]
  4665. (5+3)*6
  4666. [[[5, '+', 3], '*', 6]]
  4667. -2--11
  4668. [[['-', 2], '-', ['-', 11]]]
  4669. """
  4670. # captive version of FollowedBy that does not do parse actions or capture results names
  4671. class _FB(FollowedBy):
  4672. def parseImpl(self, instring, loc, doActions=True):
  4673. self.expr.tryParse(instring, loc)
  4674. return loc, []
  4675. ret = Forward()
  4676. lastExpr = baseExpr | ( lpar + ret + rpar )
  4677. for i,operDef in enumerate(opList):
  4678. opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  4679. termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
  4680. if arity == 3:
  4681. if opExpr is None or len(opExpr) != 2:
  4682. raise ValueError(
  4683. "if numterms=3, opExpr must be a tuple or list of two expressions")
  4684. opExpr1, opExpr2 = opExpr
  4685. thisExpr = Forward().setName(termName)
  4686. if rightLeftAssoc == opAssoc.LEFT:
  4687. if arity == 1:
  4688. matchExpr = _FB(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
  4689. elif arity == 2:
  4690. if opExpr is not None:
  4691. matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  4692. else:
  4693. matchExpr = _FB(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
  4694. elif arity == 3:
  4695. matchExpr = _FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
  4696. Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
  4697. else:
  4698. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4699. elif rightLeftAssoc == opAssoc.RIGHT:
  4700. if arity == 1:
  4701. # try to avoid LR with this extra test
  4702. if not isinstance(opExpr, Optional):
  4703. opExpr = Optional(opExpr)
  4704. matchExpr = _FB(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
  4705. elif arity == 2:
  4706. if opExpr is not None:
  4707. matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  4708. else:
  4709. matchExpr = _FB(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
  4710. elif arity == 3:
  4711. matchExpr = _FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
  4712. Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
  4713. else:
  4714. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4715. else:
  4716. raise ValueError("operator must indicate right or left associativity")
  4717. if pa:
  4718. if isinstance(pa, (tuple, list)):
  4719. matchExpr.setParseAction(*pa)
  4720. else:
  4721. matchExpr.setParseAction(pa)
  4722. thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
  4723. lastExpr = thisExpr
  4724. ret <<= lastExpr
  4725. return ret
  4726. operatorPrecedence = infixNotation
  4727. """(Deprecated) Former name of :class:`infixNotation`, will be
  4728. dropped in a future release."""
  4729. dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
  4730. sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
  4731. quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
  4732. Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
  4733. unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
  4734. def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
  4735. """Helper method for defining nested lists enclosed in opening and
  4736. closing delimiters ("(" and ")" are the default).
  4737. Parameters:
  4738. - opener - opening character for a nested list
  4739. (default= ``"("``); can also be a pyparsing expression
  4740. - closer - closing character for a nested list
  4741. (default= ``")"``); can also be a pyparsing expression
  4742. - content - expression for items within the nested lists
  4743. (default= ``None``)
  4744. - ignoreExpr - expression for ignoring opening and closing
  4745. delimiters (default= :class:`quotedString`)
  4746. If an expression is not provided for the content argument, the
  4747. nested expression will capture all whitespace-delimited content
  4748. between delimiters as a list of separate values.
  4749. Use the ``ignoreExpr`` argument to define expressions that may
  4750. contain opening or closing characters that should not be treated as
  4751. opening or closing characters for nesting, such as quotedString or
  4752. a comment expression. Specify multiple expressions using an
  4753. :class:`Or` or :class:`MatchFirst`. The default is
  4754. :class:`quotedString`, but if no expressions are to be ignored, then
  4755. pass ``None`` for this argument.
  4756. Example::
  4757. data_type = oneOf("void int short long char float double")
  4758. decl_data_type = Combine(data_type + Optional(Word('*')))
  4759. ident = Word(alphas+'_', alphanums+'_')
  4760. number = pyparsing_common.number
  4761. arg = Group(decl_data_type + ident)
  4762. LPAR,RPAR = map(Suppress, "()")
  4763. code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
  4764. c_function = (decl_data_type("type")
  4765. + ident("name")
  4766. + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
  4767. + code_body("body"))
  4768. c_function.ignore(cStyleComment)
  4769. source_code = '''
  4770. int is_odd(int x) {
  4771. return (x%2);
  4772. }
  4773. int dec_to_hex(char hchar) {
  4774. if (hchar >= '0' && hchar <= '9') {
  4775. return (ord(hchar)-ord('0'));
  4776. } else {
  4777. return (10+ord(hchar)-ord('A'));
  4778. }
  4779. }
  4780. '''
  4781. for func in c_function.searchString(source_code):
  4782. print("%(name)s (%(type)s) args: %(args)s" % func)
  4783. prints::
  4784. is_odd (int) args: [['int', 'x']]
  4785. dec_to_hex (int) args: [['char', 'hchar']]
  4786. """
  4787. if opener == closer:
  4788. raise ValueError("opening and closing strings cannot be the same")
  4789. if content is None:
  4790. if isinstance(opener,basestring) and isinstance(closer,basestring):
  4791. if len(opener) == 1 and len(closer)==1:
  4792. if ignoreExpr is not None:
  4793. content = (Combine(OneOrMore(~ignoreExpr +
  4794. CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4795. ).setParseAction(lambda t:t[0].strip()))
  4796. else:
  4797. content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
  4798. ).setParseAction(lambda t:t[0].strip()))
  4799. else:
  4800. if ignoreExpr is not None:
  4801. content = (Combine(OneOrMore(~ignoreExpr +
  4802. ~Literal(opener) + ~Literal(closer) +
  4803. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4804. ).setParseAction(lambda t:t[0].strip()))
  4805. else:
  4806. content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
  4807. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4808. ).setParseAction(lambda t:t[0].strip()))
  4809. else:
  4810. raise ValueError("opening and closing arguments must be strings if no content expression is given")
  4811. ret = Forward()
  4812. if ignoreExpr is not None:
  4813. ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
  4814. else:
  4815. ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
  4816. ret.setName('nested %s%s expression' % (opener,closer))
  4817. return ret
  4818. def indentedBlock(blockStatementExpr, indentStack, indent=True):
  4819. """Helper method for defining space-delimited indentation blocks,
  4820. such as those used to define block statements in Python source code.
  4821. Parameters:
  4822. - blockStatementExpr - expression defining syntax of statement that
  4823. is repeated within the indented block
  4824. - indentStack - list created by caller to manage indentation stack
  4825. (multiple statementWithIndentedBlock expressions within a single
  4826. grammar should share a common indentStack)
  4827. - indent - boolean indicating whether block must be indented beyond
  4828. the the current level; set to False for block of left-most
  4829. statements (default= ``True``)
  4830. A valid block must contain at least one ``blockStatement``.
  4831. Example::
  4832. data = '''
  4833. def A(z):
  4834. A1
  4835. B = 100
  4836. G = A2
  4837. A2
  4838. A3
  4839. B
  4840. def BB(a,b,c):
  4841. BB1
  4842. def BBA():
  4843. bba1
  4844. bba2
  4845. bba3
  4846. C
  4847. D
  4848. def spam(x,y):
  4849. def eggs(z):
  4850. pass
  4851. '''
  4852. indentStack = [1]
  4853. stmt = Forward()
  4854. identifier = Word(alphas, alphanums)
  4855. funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
  4856. func_body = indentedBlock(stmt, indentStack)
  4857. funcDef = Group( funcDecl + func_body )
  4858. rvalue = Forward()
  4859. funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
  4860. rvalue << (funcCall | identifier | Word(nums))
  4861. assignment = Group(identifier + "=" + rvalue)
  4862. stmt << ( funcDef | assignment | identifier )
  4863. module_body = OneOrMore(stmt)
  4864. parseTree = module_body.parseString(data)
  4865. parseTree.pprint()
  4866. prints::
  4867. [['def',
  4868. 'A',
  4869. ['(', 'z', ')'],
  4870. ':',
  4871. [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
  4872. 'B',
  4873. ['def',
  4874. 'BB',
  4875. ['(', 'a', 'b', 'c', ')'],
  4876. ':',
  4877. [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
  4878. 'C',
  4879. 'D',
  4880. ['def',
  4881. 'spam',
  4882. ['(', 'x', 'y', ')'],
  4883. ':',
  4884. [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
  4885. """
  4886. def checkPeerIndent(s,l,t):
  4887. if l >= len(s): return
  4888. curCol = col(l,s)
  4889. if curCol != indentStack[-1]:
  4890. if curCol > indentStack[-1]:
  4891. raise ParseFatalException(s,l,"illegal nesting")
  4892. raise ParseException(s,l,"not a peer entry")
  4893. def checkSubIndent(s,l,t):
  4894. curCol = col(l,s)
  4895. if curCol > indentStack[-1]:
  4896. indentStack.append( curCol )
  4897. else:
  4898. raise ParseException(s,l,"not a subentry")
  4899. def checkUnindent(s,l,t):
  4900. if l >= len(s): return
  4901. curCol = col(l,s)
  4902. if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
  4903. raise ParseException(s,l,"not an unindent")
  4904. indentStack.pop()
  4905. NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
  4906. INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
  4907. PEER = Empty().setParseAction(checkPeerIndent).setName('')
  4908. UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
  4909. if indent:
  4910. smExpr = Group( Optional(NL) +
  4911. #~ FollowedBy(blockStatementExpr) +
  4912. INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
  4913. else:
  4914. smExpr = Group( Optional(NL) +
  4915. (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
  4916. blockStatementExpr.ignore(_bslash + LineEnd())
  4917. return smExpr.setName('indented block')
  4918. alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  4919. punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  4920. anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
  4921. _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
  4922. commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
  4923. def replaceHTMLEntity(t):
  4924. """Helper parser action to replace common HTML entities with their special characters"""
  4925. return _htmlEntityMap.get(t.entity)
  4926. # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  4927. cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
  4928. "Comment of the form ``/* ... */``"
  4929. htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
  4930. "Comment of the form ``<!-- ... -->``"
  4931. restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
  4932. dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
  4933. "Comment of the form ``// ... (to end of line)``"
  4934. cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
  4935. "Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
  4936. javaStyleComment = cppStyleComment
  4937. "Same as :class:`cppStyleComment`"
  4938. pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  4939. "Comment of the form ``# ... (to end of line)``"
  4940. _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
  4941. Optional( Word(" \t") +
  4942. ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  4943. commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
  4944. """(Deprecated) Predefined expression of 1 or more printable words or
  4945. quoted strings, separated by commas.
  4946. This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
  4947. """
  4948. # some other useful expressions - using lower-case class name since we are really using this as a namespace
  4949. class pyparsing_common:
  4950. """Here are some common low-level expressions that may be useful in
  4951. jump-starting parser development:
  4952. - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
  4953. :class:`scientific notation<sci_real>`)
  4954. - common :class:`programming identifiers<identifier>`
  4955. - network addresses (:class:`MAC<mac_address>`,
  4956. :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
  4957. - ISO8601 :class:`dates<iso8601_date>` and
  4958. :class:`datetime<iso8601_datetime>`
  4959. - :class:`UUID<uuid>`
  4960. - :class:`comma-separated list<comma_separated_list>`
  4961. Parse actions:
  4962. - :class:`convertToInteger`
  4963. - :class:`convertToFloat`
  4964. - :class:`convertToDate`
  4965. - :class:`convertToDatetime`
  4966. - :class:`stripHTMLTags`
  4967. - :class:`upcaseTokens`
  4968. - :class:`downcaseTokens`
  4969. Example::
  4970. pyparsing_common.number.runTests('''
  4971. # any int or real number, returned as the appropriate type
  4972. 100
  4973. -100
  4974. +100
  4975. 3.14159
  4976. 6.02e23
  4977. 1e-12
  4978. ''')
  4979. pyparsing_common.fnumber.runTests('''
  4980. # any int or real number, returned as float
  4981. 100
  4982. -100
  4983. +100
  4984. 3.14159
  4985. 6.02e23
  4986. 1e-12
  4987. ''')
  4988. pyparsing_common.hex_integer.runTests('''
  4989. # hex numbers
  4990. 100
  4991. FF
  4992. ''')
  4993. pyparsing_common.fraction.runTests('''
  4994. # fractions
  4995. 1/2
  4996. -3/4
  4997. ''')
  4998. pyparsing_common.mixed_integer.runTests('''
  4999. # mixed fractions
  5000. 1
  5001. 1/2
  5002. -3/4
  5003. 1-3/4
  5004. ''')
  5005. import uuid
  5006. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  5007. pyparsing_common.uuid.runTests('''
  5008. # uuid
  5009. 12345678-1234-5678-1234-567812345678
  5010. ''')
  5011. prints::
  5012. # any int or real number, returned as the appropriate type
  5013. 100
  5014. [100]
  5015. -100
  5016. [-100]
  5017. +100
  5018. [100]
  5019. 3.14159
  5020. [3.14159]
  5021. 6.02e23
  5022. [6.02e+23]
  5023. 1e-12
  5024. [1e-12]
  5025. # any int or real number, returned as float
  5026. 100
  5027. [100.0]
  5028. -100
  5029. [-100.0]
  5030. +100
  5031. [100.0]
  5032. 3.14159
  5033. [3.14159]
  5034. 6.02e23
  5035. [6.02e+23]
  5036. 1e-12
  5037. [1e-12]
  5038. # hex numbers
  5039. 100
  5040. [256]
  5041. FF
  5042. [255]
  5043. # fractions
  5044. 1/2
  5045. [0.5]
  5046. -3/4
  5047. [-0.75]
  5048. # mixed fractions
  5049. 1
  5050. [1]
  5051. 1/2
  5052. [0.5]
  5053. -3/4
  5054. [-0.75]
  5055. 1-3/4
  5056. [1.75]
  5057. # uuid
  5058. 12345678-1234-5678-1234-567812345678
  5059. [UUID('12345678-1234-5678-1234-567812345678')]
  5060. """
  5061. convertToInteger = tokenMap(int)
  5062. """
  5063. Parse action for converting parsed integers to Python int
  5064. """
  5065. convertToFloat = tokenMap(float)
  5066. """
  5067. Parse action for converting parsed numbers to Python float
  5068. """
  5069. integer = Word(nums).setName("integer").setParseAction(convertToInteger)
  5070. """expression that parses an unsigned integer, returns an int"""
  5071. hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
  5072. """expression that parses a hexadecimal integer, returns an int"""
  5073. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
  5074. """expression that parses an integer with optional leading sign, returns an int"""
  5075. fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
  5076. """fractional expression of an integer divided by an integer, returns a float"""
  5077. fraction.addParseAction(lambda t: t[0]/t[-1])
  5078. mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
  5079. """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
  5080. mixed_integer.addParseAction(sum)
  5081. real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
  5082. """expression that parses a floating point number and returns a float"""
  5083. sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
  5084. """expression that parses a floating point number with optional
  5085. scientific notation and returns a float"""
  5086. # streamlining this expression makes the docs nicer-looking
  5087. number = (sci_real | real | signed_integer).streamline()
  5088. """any numeric expression, returns the corresponding Python type"""
  5089. fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
  5090. """any int or real number, returned as float"""
  5091. identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
  5092. """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
  5093. ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
  5094. "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
  5095. _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
  5096. _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
  5097. _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
  5098. _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
  5099. _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
  5100. ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
  5101. "IPv6 address (long, short, or mixed form)"
  5102. mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
  5103. "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
  5104. @staticmethod
  5105. def convertToDate(fmt="%Y-%m-%d"):
  5106. """
  5107. Helper to create a parse action for converting parsed date string to Python datetime.date
  5108. Params -
  5109. - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
  5110. Example::
  5111. date_expr = pyparsing_common.iso8601_date.copy()
  5112. date_expr.setParseAction(pyparsing_common.convertToDate())
  5113. print(date_expr.parseString("1999-12-31"))
  5114. prints::
  5115. [datetime.date(1999, 12, 31)]
  5116. """
  5117. def cvt_fn(s,l,t):
  5118. try:
  5119. return datetime.strptime(t[0], fmt).date()
  5120. except ValueError as ve:
  5121. raise ParseException(s, l, str(ve))
  5122. return cvt_fn
  5123. @staticmethod
  5124. def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
  5125. """Helper to create a parse action for converting parsed
  5126. datetime string to Python datetime.datetime
  5127. Params -
  5128. - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
  5129. Example::
  5130. dt_expr = pyparsing_common.iso8601_datetime.copy()
  5131. dt_expr.setParseAction(pyparsing_common.convertToDatetime())
  5132. print(dt_expr.parseString("1999-12-31T23:59:59.999"))
  5133. prints::
  5134. [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
  5135. """
  5136. def cvt_fn(s,l,t):
  5137. try:
  5138. return datetime.strptime(t[0], fmt)
  5139. except ValueError as ve:
  5140. raise ParseException(s, l, str(ve))
  5141. return cvt_fn
  5142. iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
  5143. "ISO8601 date (``yyyy-mm-dd``)"
  5144. iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
  5145. "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
  5146. uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
  5147. "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
  5148. _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
  5149. @staticmethod
  5150. def stripHTMLTags(s, l, tokens):
  5151. """Parse action to remove HTML tags from web page HTML source
  5152. Example::
  5153. # strip HTML links from normal text
  5154. text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
  5155. td,td_end = makeHTMLTags("TD")
  5156. table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
  5157. print(table_text.parseString(text).body)
  5158. Prints::
  5159. More info at the pyparsing wiki page
  5160. """
  5161. return pyparsing_common._html_stripper.transformString(tokens[0])
  5162. _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
  5163. + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
  5164. comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
  5165. """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
  5166. upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
  5167. """Parse action to convert tokens to upper case."""
  5168. downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
  5169. """Parse action to convert tokens to lower case."""
  5170. class _lazyclassproperty(object):
  5171. def __init__(self, fn):
  5172. self.fn = fn
  5173. self.__doc__ = fn.__doc__
  5174. self.__name__ = fn.__name__
  5175. def __get__(self, obj, cls):
  5176. if cls is None:
  5177. cls = type(obj)
  5178. if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) for superclass in cls.__mro__[1:]):
  5179. cls._intern = {}
  5180. attrname = self.fn.__name__
  5181. if attrname not in cls._intern:
  5182. cls._intern[attrname] = self.fn(cls)
  5183. return cls._intern[attrname]
  5184. class unicode_set(object):
  5185. """
  5186. A set of Unicode characters, for language-specific strings for
  5187. ``alphas``, ``nums``, ``alphanums``, and ``printables``.
  5188. A unicode_set is defined by a list of ranges in the Unicode character
  5189. set, in a class attribute ``_ranges``, such as::
  5190. _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
  5191. A unicode set can also be defined using multiple inheritance of other unicode sets::
  5192. class CJK(Chinese, Japanese, Korean):
  5193. pass
  5194. """
  5195. _ranges = []
  5196. @classmethod
  5197. def _get_chars_for_ranges(cls):
  5198. ret = []
  5199. for cc in cls.__mro__:
  5200. if cc is unicode_set:
  5201. break
  5202. for rr in cc._ranges:
  5203. ret.extend(range(rr[0], rr[-1]+1))
  5204. return [unichr(c) for c in sorted(set(ret))]
  5205. @_lazyclassproperty
  5206. def printables(cls):
  5207. "all non-whitespace characters in this range"
  5208. return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
  5209. @_lazyclassproperty
  5210. def alphas(cls):
  5211. "all alphabetic characters in this range"
  5212. return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
  5213. @_lazyclassproperty
  5214. def nums(cls):
  5215. "all numeric digit characters in this range"
  5216. return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
  5217. @_lazyclassproperty
  5218. def alphanums(cls):
  5219. "all alphanumeric characters in this range"
  5220. return cls.alphas + cls.nums
  5221. class pyparsing_unicode(unicode_set):
  5222. """
  5223. A namespace class for defining common language unicode_sets.
  5224. """
  5225. _ranges = [(32, sys.maxunicode)]
  5226. class Latin1(unicode_set):
  5227. "Unicode set for Latin-1 Unicode Character Range"
  5228. _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
  5229. class LatinA(unicode_set):
  5230. "Unicode set for Latin-A Unicode Character Range"
  5231. _ranges = [(0x0100, 0x017f),]
  5232. class LatinB(unicode_set):
  5233. "Unicode set for Latin-B Unicode Character Range"
  5234. _ranges = [(0x0180, 0x024f),]
  5235. class Greek(unicode_set):
  5236. "Unicode set for Greek Unicode Character Ranges"
  5237. _ranges = [
  5238. (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
  5239. (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
  5240. (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
  5241. ]
  5242. class Cyrillic(unicode_set):
  5243. "Unicode set for Cyrillic Unicode Character Range"
  5244. _ranges = [(0x0400, 0x04ff)]
  5245. class Chinese(unicode_set):
  5246. "Unicode set for Chinese Unicode Character Range"
  5247. _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f), ]
  5248. class Japanese(unicode_set):
  5249. "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
  5250. _ranges = [ ]
  5251. class Kanji(unicode_set):
  5252. "Unicode set for Kanji Unicode Character Range"
  5253. _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f), ]
  5254. class Hiragana(unicode_set):
  5255. "Unicode set for Hiragana Unicode Character Range"
  5256. _ranges = [(0x3040, 0x309f), ]
  5257. class Katakana(unicode_set):
  5258. "Unicode set for Katakana Unicode Character Range"
  5259. _ranges = [(0x30a0, 0x30ff), ]
  5260. class Korean(unicode_set):
  5261. "Unicode set for Korean Unicode Character Range"
  5262. _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f), ]
  5263. class CJK(Chinese, Japanese, Korean):
  5264. "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
  5265. pass
  5266. class Thai(unicode_set):
  5267. "Unicode set for Thai Unicode Character Range"
  5268. _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b), ]
  5269. class Arabic(unicode_set):
  5270. "Unicode set for Arabic Unicode Character Range"
  5271. _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f), ]
  5272. class Hebrew(unicode_set):
  5273. "Unicode set for Hebrew Unicode Character Range"
  5274. _ranges = [(0x0590, 0x05ff), ]
  5275. class Devanagari(unicode_set):
  5276. "Unicode set for Devanagari Unicode Character Range"
  5277. _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
  5278. pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
  5279. + pyparsing_unicode.Japanese.Hiragana._ranges
  5280. + pyparsing_unicode.Japanese.Katakana._ranges)
  5281. # define ranges in language character sets
  5282. if PY_3:
  5283. setattr(pyparsing_unicode, "العربية", pyparsing_unicode.Arabic)
  5284. setattr(pyparsing_unicode, "中文", pyparsing_unicode.Chinese)
  5285. setattr(pyparsing_unicode, "кириллица", pyparsing_unicode.Cyrillic)
  5286. setattr(pyparsing_unicode, "Ελληνικά", pyparsing_unicode.Greek)
  5287. setattr(pyparsing_unicode, "עִברִית", pyparsing_unicode.Hebrew)
  5288. setattr(pyparsing_unicode, "日本語", pyparsing_unicode.Japanese)
  5289. setattr(pyparsing_unicode.Japanese, "漢字", pyparsing_unicode.Japanese.Kanji)
  5290. setattr(pyparsing_unicode.Japanese, "カタカナ", pyparsing_unicode.Japanese.Katakana)
  5291. setattr(pyparsing_unicode.Japanese, "ひらがな", pyparsing_unicode.Japanese.Hiragana)
  5292. setattr(pyparsing_unicode, "한국어", pyparsing_unicode.Korean)
  5293. setattr(pyparsing_unicode, "ไทย", pyparsing_unicode.Thai)
  5294. setattr(pyparsing_unicode, "देवनागरी", pyparsing_unicode.Devanagari)
  5295. if __name__ == "__main__":
  5296. selectToken = CaselessLiteral("select")
  5297. fromToken = CaselessLiteral("from")
  5298. ident = Word(alphas, alphanums + "_$")
  5299. columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  5300. columnNameList = Group(delimitedList(columnName)).setName("columns")
  5301. columnSpec = ('*' | columnNameList)
  5302. tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  5303. tableNameList = Group(delimitedList(tableName)).setName("tables")
  5304. simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
  5305. # demo runTests method, including embedded comments in test string
  5306. simpleSQL.runTests("""
  5307. # '*' as column list and dotted table name
  5308. select * from SYS.XYZZY
  5309. # caseless match on "SELECT", and casts back to "select"
  5310. SELECT * from XYZZY, ABC
  5311. # list of column names, and mixed case SELECT keyword
  5312. Select AA,BB,CC from Sys.dual
  5313. # multiple tables
  5314. Select A, B, C from Sys.dual, Table2
  5315. # invalid SELECT keyword - should fail
  5316. Xelect A, B, C from Sys.dual
  5317. # incomplete command - should fail
  5318. Select
  5319. # invalid column name - should fail
  5320. Select ^^^ frox Sys.dual
  5321. """)
  5322. pyparsing_common.number.runTests("""
  5323. 100
  5324. -100
  5325. +100
  5326. 3.14159
  5327. 6.02e23
  5328. 1e-12
  5329. """)
  5330. # any int or real number, returned as float
  5331. pyparsing_common.fnumber.runTests("""
  5332. 100
  5333. -100
  5334. +100
  5335. 3.14159
  5336. 6.02e23
  5337. 1e-12
  5338. """)
  5339. pyparsing_common.hex_integer.runTests("""
  5340. 100
  5341. FF
  5342. """)
  5343. import uuid
  5344. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  5345. pyparsing_common.uuid.runTests("""
  5346. 12345678-1234-5678-1234-567812345678
  5347. """)