You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

yacc.py 134KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494
  1. # -----------------------------------------------------------------------------
  2. # ply: yacc.py
  3. #
  4. # Copyright (C) 2001-2017
  5. # David M. Beazley (Dabeaz LLC)
  6. # All rights reserved.
  7. #
  8. # Redistribution and use in source and binary forms, with or without
  9. # modification, are permitted provided that the following conditions are
  10. # met:
  11. #
  12. # * Redistributions of source code must retain the above copyright notice,
  13. # this list of conditions and the following disclaimer.
  14. # * Redistributions in binary form must reproduce the above copyright notice,
  15. # this list of conditions and the following disclaimer in the documentation
  16. # and/or other materials provided with the distribution.
  17. # * Neither the name of the David Beazley or Dabeaz LLC may be used to
  18. # endorse or promote products derived from this software without
  19. # specific prior written permission.
  20. #
  21. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. # -----------------------------------------------------------------------------
  33. #
  34. # This implements an LR parser that is constructed from grammar rules defined
  35. # as Python functions. The grammer is specified by supplying the BNF inside
  36. # Python documentation strings. The inspiration for this technique was borrowed
  37. # from John Aycock's Spark parsing system. PLY might be viewed as cross between
  38. # Spark and the GNU bison utility.
  39. #
  40. # The current implementation is only somewhat object-oriented. The
  41. # LR parser itself is defined in terms of an object (which allows multiple
  42. # parsers to co-exist). However, most of the variables used during table
  43. # construction are defined in terms of global variables. Users shouldn't
  44. # notice unless they are trying to define multiple parsers at the same
  45. # time using threads (in which case they should have their head examined).
  46. #
  47. # This implementation supports both SLR and LALR(1) parsing. LALR(1)
  48. # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
  49. # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
  50. # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
  51. # by the more efficient DeRemer and Pennello algorithm.
  52. #
  53. # :::::::: WARNING :::::::
  54. #
  55. # Construction of LR parsing tables is fairly complicated and expensive.
  56. # To make this module run fast, a *LOT* of work has been put into
  57. # optimization---often at the expensive of readability and what might
  58. # consider to be good Python "coding style." Modify the code at your
  59. # own risk!
  60. # ----------------------------------------------------------------------------
  61. import re
  62. import types
  63. import sys
  64. import os.path
  65. import inspect
  66. import base64
  67. import warnings
  68. __version__ = '3.10'
  69. __tabversion__ = '3.10'
  70. #-----------------------------------------------------------------------------
  71. # === User configurable parameters ===
  72. #
  73. # Change these to modify the default behavior of yacc (if you wish)
  74. #-----------------------------------------------------------------------------
  75. yaccdebug = True # Debugging mode. If set, yacc generates a
  76. # a 'parser.out' file in the current directory
  77. debug_file = 'parser.out' # Default name of the debugging file
  78. tab_module = 'parsetab' # Default name of the table module
  79. default_lr = 'LALR' # Default LR table generation method
  80. error_count = 3 # Number of symbols that must be shifted to leave recovery mode
  81. yaccdevel = False # Set to True if developing yacc. This turns off optimized
  82. # implementations of certain functions.
  83. resultlimit = 40 # Size limit of results when running in debug mode.
  84. pickle_protocol = 0 # Protocol to use when writing pickle files
  85. # String type-checking compatibility
  86. if sys.version_info[0] < 3:
  87. string_types = basestring
  88. else:
  89. string_types = str
  90. MAXINT = sys.maxsize
  91. # This object is a stand-in for a logging object created by the
  92. # logging module. PLY will use this by default to create things
  93. # such as the parser.out file. If a user wants more detailed
  94. # information, they can create their own logging object and pass
  95. # it into PLY.
  96. class PlyLogger(object):
  97. def __init__(self, f):
  98. self.f = f
  99. def debug(self, msg, *args, **kwargs):
  100. self.f.write((msg % args) + '\n')
  101. info = debug
  102. def warning(self, msg, *args, **kwargs):
  103. self.f.write('WARNING: ' + (msg % args) + '\n')
  104. def error(self, msg, *args, **kwargs):
  105. self.f.write('ERROR: ' + (msg % args) + '\n')
  106. critical = debug
  107. # Null logger is used when no output is generated. Does nothing.
  108. class NullLogger(object):
  109. def __getattribute__(self, name):
  110. return self
  111. def __call__(self, *args, **kwargs):
  112. return self
  113. # Exception raised for yacc-related errors
  114. class YaccError(Exception):
  115. pass
  116. # Format the result message that the parser produces when running in debug mode.
  117. def format_result(r):
  118. repr_str = repr(r)
  119. if '\n' in repr_str:
  120. repr_str = repr(repr_str)
  121. if len(repr_str) > resultlimit:
  122. repr_str = repr_str[:resultlimit] + ' ...'
  123. result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
  124. return result
  125. # Format stack entries when the parser is running in debug mode
  126. def format_stack_entry(r):
  127. repr_str = repr(r)
  128. if '\n' in repr_str:
  129. repr_str = repr(repr_str)
  130. if len(repr_str) < 16:
  131. return repr_str
  132. else:
  133. return '<%s @ 0x%x>' % (type(r).__name__, id(r))
  134. # Panic mode error recovery support. This feature is being reworked--much of the
  135. # code here is to offer a deprecation/backwards compatible transition
  136. _errok = None
  137. _token = None
  138. _restart = None
  139. _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
  140. Instead, invoke the methods on the associated parser instance:
  141. def p_error(p):
  142. ...
  143. # Use parser.errok(), parser.token(), parser.restart()
  144. ...
  145. parser = yacc.yacc()
  146. '''
  147. def errok():
  148. warnings.warn(_warnmsg)
  149. return _errok()
  150. def restart():
  151. warnings.warn(_warnmsg)
  152. return _restart()
  153. def token():
  154. warnings.warn(_warnmsg)
  155. return _token()
  156. # Utility function to call the p_error() function with some deprecation hacks
  157. def call_errorfunc(errorfunc, token, parser):
  158. global _errok, _token, _restart
  159. _errok = parser.errok
  160. _token = parser.token
  161. _restart = parser.restart
  162. r = errorfunc(token)
  163. try:
  164. del _errok, _token, _restart
  165. except NameError:
  166. pass
  167. return r
  168. #-----------------------------------------------------------------------------
  169. # === LR Parsing Engine ===
  170. #
  171. # The following classes are used for the LR parser itself. These are not
  172. # used during table construction and are independent of the actual LR
  173. # table generation algorithm
  174. #-----------------------------------------------------------------------------
  175. # This class is used to hold non-terminal grammar symbols during parsing.
  176. # It normally has the following attributes set:
  177. # .type = Grammar symbol type
  178. # .value = Symbol value
  179. # .lineno = Starting line number
  180. # .endlineno = Ending line number (optional, set automatically)
  181. # .lexpos = Starting lex position
  182. # .endlexpos = Ending lex position (optional, set automatically)
  183. class YaccSymbol:
  184. def __str__(self):
  185. return self.type
  186. def __repr__(self):
  187. return str(self)
  188. # This class is a wrapper around the objects actually passed to each
  189. # grammar rule. Index lookup and assignment actually assign the
  190. # .value attribute of the underlying YaccSymbol object.
  191. # The lineno() method returns the line number of a given
  192. # item (or 0 if not defined). The linespan() method returns
  193. # a tuple of (startline,endline) representing the range of lines
  194. # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
  195. # representing the range of positional information for a symbol.
  196. class YaccProduction:
  197. def __init__(self, s, stack=None):
  198. self.slice = s
  199. self.stack = stack
  200. self.lexer = None
  201. self.parser = None
  202. def __getitem__(self, n):
  203. if isinstance(n, slice):
  204. return [s.value for s in self.slice[n]]
  205. elif n >= 0:
  206. return self.slice[n].value
  207. else:
  208. return self.stack[n].value
  209. def __setitem__(self, n, v):
  210. self.slice[n].value = v
  211. def __getslice__(self, i, j):
  212. return [s.value for s in self.slice[i:j]]
  213. def __len__(self):
  214. return len(self.slice)
  215. def lineno(self, n):
  216. return getattr(self.slice[n], 'lineno', 0)
  217. def set_lineno(self, n, lineno):
  218. self.slice[n].lineno = lineno
  219. def linespan(self, n):
  220. startline = getattr(self.slice[n], 'lineno', 0)
  221. endline = getattr(self.slice[n], 'endlineno', startline)
  222. return startline, endline
  223. def lexpos(self, n):
  224. return getattr(self.slice[n], 'lexpos', 0)
  225. def lexspan(self, n):
  226. startpos = getattr(self.slice[n], 'lexpos', 0)
  227. endpos = getattr(self.slice[n], 'endlexpos', startpos)
  228. return startpos, endpos
  229. def error(self):
  230. raise SyntaxError
  231. # -----------------------------------------------------------------------------
  232. # == LRParser ==
  233. #
  234. # The LR Parsing engine.
  235. # -----------------------------------------------------------------------------
  236. class LRParser:
  237. def __init__(self, lrtab, errorf):
  238. self.productions = lrtab.lr_productions
  239. self.action = lrtab.lr_action
  240. self.goto = lrtab.lr_goto
  241. self.errorfunc = errorf
  242. self.set_defaulted_states()
  243. self.errorok = True
  244. def errok(self):
  245. self.errorok = True
  246. def restart(self):
  247. del self.statestack[:]
  248. del self.symstack[:]
  249. sym = YaccSymbol()
  250. sym.type = '$end'
  251. self.symstack.append(sym)
  252. self.statestack.append(0)
  253. # Defaulted state support.
  254. # This method identifies parser states where there is only one possible reduction action.
  255. # For such states, the parser can make a choose to make a rule reduction without consuming
  256. # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
  257. # certain kinds of advanced parsing situations where the lexer and parser interact with
  258. # each other or change states (i.e., manipulation of scope, lexer states, etc.).
  259. #
  260. # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
  261. def set_defaulted_states(self):
  262. self.defaulted_states = {}
  263. for state, actions in self.action.items():
  264. rules = list(actions.values())
  265. if len(rules) == 1 and rules[0] < 0:
  266. self.defaulted_states[state] = rules[0]
  267. def disable_defaulted_states(self):
  268. self.defaulted_states = {}
  269. def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  270. if debug or yaccdevel:
  271. if isinstance(debug, int):
  272. debug = PlyLogger(sys.stderr)
  273. return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
  274. elif tracking:
  275. return self.parseopt(input, lexer, debug, tracking, tokenfunc)
  276. else:
  277. return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
  278. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  279. # parsedebug().
  280. #
  281. # This is the debugging enabled version of parse(). All changes made to the
  282. # parsing engine should be made here. Optimized versions of this function
  283. # are automatically created by the ply/ygen.py script. This script cuts out
  284. # sections enclosed in markers such as this:
  285. #
  286. # #--! DEBUG
  287. # statements
  288. # #--! DEBUG
  289. #
  290. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  291. def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  292. #--! parsedebug-start
  293. lookahead = None # Current lookahead symbol
  294. lookaheadstack = [] # Stack of lookahead symbols
  295. actions = self.action # Local reference to action table (to avoid lookup on self.)
  296. goto = self.goto # Local reference to goto table (to avoid lookup on self.)
  297. prod = self.productions # Local reference to production list (to avoid lookup on self.)
  298. defaulted_states = self.defaulted_states # Local reference to defaulted states
  299. pslice = YaccProduction(None) # Production object passed to grammar rules
  300. errorcount = 0 # Used during error recovery
  301. #--! DEBUG
  302. debug.info('PLY: PARSE DEBUG START')
  303. #--! DEBUG
  304. # If no lexer was given, we will try to use the lex module
  305. if not lexer:
  306. from . import lex
  307. lexer = lex.lexer
  308. # Set up the lexer and parser objects on pslice
  309. pslice.lexer = lexer
  310. pslice.parser = self
  311. # If input was supplied, pass to lexer
  312. if input is not None:
  313. lexer.input(input)
  314. if tokenfunc is None:
  315. # Tokenize function
  316. get_token = lexer.token
  317. else:
  318. get_token = tokenfunc
  319. # Set the parser() token method (sometimes used in error recovery)
  320. self.token = get_token
  321. # Set up the state and symbol stacks
  322. statestack = [] # Stack of parsing states
  323. self.statestack = statestack
  324. symstack = [] # Stack of grammar symbols
  325. self.symstack = symstack
  326. pslice.stack = symstack # Put in the production
  327. errtoken = None # Err token
  328. # The start state is assumed to be (0,$end)
  329. statestack.append(0)
  330. sym = YaccSymbol()
  331. sym.type = '$end'
  332. symstack.append(sym)
  333. state = 0
  334. while True:
  335. # Get the next symbol on the input. If a lookahead symbol
  336. # is already set, we just use that. Otherwise, we'll pull
  337. # the next token off of the lookaheadstack or from the lexer
  338. #--! DEBUG
  339. debug.debug('')
  340. debug.debug('State : %s', state)
  341. #--! DEBUG
  342. if state not in defaulted_states:
  343. if not lookahead:
  344. if not lookaheadstack:
  345. lookahead = get_token() # Get the next token
  346. else:
  347. lookahead = lookaheadstack.pop()
  348. if not lookahead:
  349. lookahead = YaccSymbol()
  350. lookahead.type = '$end'
  351. # Check the action table
  352. ltype = lookahead.type
  353. t = actions[state].get(ltype)
  354. else:
  355. t = defaulted_states[state]
  356. #--! DEBUG
  357. debug.debug('Defaulted state %s: Reduce using %d', state, -t)
  358. #--! DEBUG
  359. #--! DEBUG
  360. debug.debug('Stack : %s',
  361. ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
  362. #--! DEBUG
  363. if t is not None:
  364. if t > 0:
  365. # shift a symbol on the stack
  366. statestack.append(t)
  367. state = t
  368. #--! DEBUG
  369. debug.debug('Action : Shift and goto state %s', t)
  370. #--! DEBUG
  371. symstack.append(lookahead)
  372. lookahead = None
  373. # Decrease error count on successful shift
  374. if errorcount:
  375. errorcount -= 1
  376. continue
  377. if t < 0:
  378. # reduce a symbol on the stack, emit a production
  379. p = prod[-t]
  380. pname = p.name
  381. plen = p.len
  382. # Get production function
  383. sym = YaccSymbol()
  384. sym.type = pname # Production name
  385. sym.value = None
  386. #--! DEBUG
  387. if plen:
  388. debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
  389. '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
  390. goto[statestack[-1-plen]][pname])
  391. else:
  392. debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
  393. goto[statestack[-1]][pname])
  394. #--! DEBUG
  395. if plen:
  396. targ = symstack[-plen-1:]
  397. targ[0] = sym
  398. #--! TRACKING
  399. if tracking:
  400. t1 = targ[1]
  401. sym.lineno = t1.lineno
  402. sym.lexpos = t1.lexpos
  403. t1 = targ[-1]
  404. sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
  405. sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
  406. #--! TRACKING
  407. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  408. # The code enclosed in this section is duplicated
  409. # below as a performance optimization. Make sure
  410. # changes get made in both locations.
  411. pslice.slice = targ
  412. try:
  413. # Call the grammar rule with our special slice object
  414. del symstack[-plen:]
  415. self.state = state
  416. p.callable(pslice)
  417. del statestack[-plen:]
  418. #--! DEBUG
  419. debug.info('Result : %s', format_result(pslice[0]))
  420. #--! DEBUG
  421. symstack.append(sym)
  422. state = goto[statestack[-1]][pname]
  423. statestack.append(state)
  424. except SyntaxError:
  425. # If an error was set. Enter error recovery state
  426. lookaheadstack.append(lookahead) # Save the current lookahead token
  427. symstack.extend(targ[1:-1]) # Put the production slice back on the stack
  428. statestack.pop() # Pop back one state (before the reduce)
  429. state = statestack[-1]
  430. sym.type = 'error'
  431. sym.value = 'error'
  432. lookahead = sym
  433. errorcount = error_count
  434. self.errorok = False
  435. continue
  436. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  437. else:
  438. #--! TRACKING
  439. if tracking:
  440. sym.lineno = lexer.lineno
  441. sym.lexpos = lexer.lexpos
  442. #--! TRACKING
  443. targ = [sym]
  444. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  445. # The code enclosed in this section is duplicated
  446. # above as a performance optimization. Make sure
  447. # changes get made in both locations.
  448. pslice.slice = targ
  449. try:
  450. # Call the grammar rule with our special slice object
  451. self.state = state
  452. p.callable(pslice)
  453. #--! DEBUG
  454. debug.info('Result : %s', format_result(pslice[0]))
  455. #--! DEBUG
  456. symstack.append(sym)
  457. state = goto[statestack[-1]][pname]
  458. statestack.append(state)
  459. except SyntaxError:
  460. # If an error was set. Enter error recovery state
  461. lookaheadstack.append(lookahead) # Save the current lookahead token
  462. statestack.pop() # Pop back one state (before the reduce)
  463. state = statestack[-1]
  464. sym.type = 'error'
  465. sym.value = 'error'
  466. lookahead = sym
  467. errorcount = error_count
  468. self.errorok = False
  469. continue
  470. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  471. if t == 0:
  472. n = symstack[-1]
  473. result = getattr(n, 'value', None)
  474. #--! DEBUG
  475. debug.info('Done : Returning %s', format_result(result))
  476. debug.info('PLY: PARSE DEBUG END')
  477. #--! DEBUG
  478. return result
  479. if t is None:
  480. #--! DEBUG
  481. debug.error('Error : %s',
  482. ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
  483. #--! DEBUG
  484. # We have some kind of parsing error here. To handle
  485. # this, we are going to push the current token onto
  486. # the tokenstack and replace it with an 'error' token.
  487. # If there are any synchronization rules, they may
  488. # catch it.
  489. #
  490. # In addition to pushing the error token, we call call
  491. # the user defined p_error() function if this is the
  492. # first syntax error. This function is only called if
  493. # errorcount == 0.
  494. if errorcount == 0 or self.errorok:
  495. errorcount = error_count
  496. self.errorok = False
  497. errtoken = lookahead
  498. if errtoken.type == '$end':
  499. errtoken = None # End of file!
  500. if self.errorfunc:
  501. if errtoken and not hasattr(errtoken, 'lexer'):
  502. errtoken.lexer = lexer
  503. self.state = state
  504. tok = call_errorfunc(self.errorfunc, errtoken, self)
  505. if self.errorok:
  506. # User must have done some kind of panic
  507. # mode recovery on their own. The
  508. # returned token is the next lookahead
  509. lookahead = tok
  510. errtoken = None
  511. continue
  512. else:
  513. if errtoken:
  514. if hasattr(errtoken, 'lineno'):
  515. lineno = lookahead.lineno
  516. else:
  517. lineno = 0
  518. if lineno:
  519. sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
  520. else:
  521. sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
  522. else:
  523. sys.stderr.write('yacc: Parse error in input. EOF\n')
  524. return
  525. else:
  526. errorcount = error_count
  527. # case 1: the statestack only has 1 entry on it. If we're in this state, the
  528. # entire parse has been rolled back and we're completely hosed. The token is
  529. # discarded and we just keep going.
  530. if len(statestack) <= 1 and lookahead.type != '$end':
  531. lookahead = None
  532. errtoken = None
  533. state = 0
  534. # Nuke the pushback stack
  535. del lookaheadstack[:]
  536. continue
  537. # case 2: the statestack has a couple of entries on it, but we're
  538. # at the end of the file. nuke the top entry and generate an error token
  539. # Start nuking entries on the stack
  540. if lookahead.type == '$end':
  541. # Whoa. We're really hosed here. Bail out
  542. return
  543. if lookahead.type != 'error':
  544. sym = symstack[-1]
  545. if sym.type == 'error':
  546. # Hmmm. Error is on top of stack, we'll just nuke input
  547. # symbol and continue
  548. #--! TRACKING
  549. if tracking:
  550. sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
  551. sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
  552. #--! TRACKING
  553. lookahead = None
  554. continue
  555. # Create the error symbol for the first time and make it the new lookahead symbol
  556. t = YaccSymbol()
  557. t.type = 'error'
  558. if hasattr(lookahead, 'lineno'):
  559. t.lineno = t.endlineno = lookahead.lineno
  560. if hasattr(lookahead, 'lexpos'):
  561. t.lexpos = t.endlexpos = lookahead.lexpos
  562. t.value = lookahead
  563. lookaheadstack.append(lookahead)
  564. lookahead = t
  565. else:
  566. sym = symstack.pop()
  567. #--! TRACKING
  568. if tracking:
  569. lookahead.lineno = sym.lineno
  570. lookahead.lexpos = sym.lexpos
  571. #--! TRACKING
  572. statestack.pop()
  573. state = statestack[-1]
  574. continue
  575. # Call an error function here
  576. raise RuntimeError('yacc: internal parser error!!!\n')
  577. #--! parsedebug-end
  578. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  579. # parseopt().
  580. #
  581. # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
  582. # This code is automatically generated by the ply/ygen.py script. Make
  583. # changes to the parsedebug() method instead.
  584. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  585. def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  586. #--! parseopt-start
  587. lookahead = None # Current lookahead symbol
  588. lookaheadstack = [] # Stack of lookahead symbols
  589. actions = self.action # Local reference to action table (to avoid lookup on self.)
  590. goto = self.goto # Local reference to goto table (to avoid lookup on self.)
  591. prod = self.productions # Local reference to production list (to avoid lookup on self.)
  592. defaulted_states = self.defaulted_states # Local reference to defaulted states
  593. pslice = YaccProduction(None) # Production object passed to grammar rules
  594. errorcount = 0 # Used during error recovery
  595. # If no lexer was given, we will try to use the lex module
  596. if not lexer:
  597. from . import lex
  598. lexer = lex.lexer
  599. # Set up the lexer and parser objects on pslice
  600. pslice.lexer = lexer
  601. pslice.parser = self
  602. # If input was supplied, pass to lexer
  603. if input is not None:
  604. lexer.input(input)
  605. if tokenfunc is None:
  606. # Tokenize function
  607. get_token = lexer.token
  608. else:
  609. get_token = tokenfunc
  610. # Set the parser() token method (sometimes used in error recovery)
  611. self.token = get_token
  612. # Set up the state and symbol stacks
  613. statestack = [] # Stack of parsing states
  614. self.statestack = statestack
  615. symstack = [] # Stack of grammar symbols
  616. self.symstack = symstack
  617. pslice.stack = symstack # Put in the production
  618. errtoken = None # Err token
  619. # The start state is assumed to be (0,$end)
  620. statestack.append(0)
  621. sym = YaccSymbol()
  622. sym.type = '$end'
  623. symstack.append(sym)
  624. state = 0
  625. while True:
  626. # Get the next symbol on the input. If a lookahead symbol
  627. # is already set, we just use that. Otherwise, we'll pull
  628. # the next token off of the lookaheadstack or from the lexer
  629. if state not in defaulted_states:
  630. if not lookahead:
  631. if not lookaheadstack:
  632. lookahead = get_token() # Get the next token
  633. else:
  634. lookahead = lookaheadstack.pop()
  635. if not lookahead:
  636. lookahead = YaccSymbol()
  637. lookahead.type = '$end'
  638. # Check the action table
  639. ltype = lookahead.type
  640. t = actions[state].get(ltype)
  641. else:
  642. t = defaulted_states[state]
  643. if t is not None:
  644. if t > 0:
  645. # shift a symbol on the stack
  646. statestack.append(t)
  647. state = t
  648. symstack.append(lookahead)
  649. lookahead = None
  650. # Decrease error count on successful shift
  651. if errorcount:
  652. errorcount -= 1
  653. continue
  654. if t < 0:
  655. # reduce a symbol on the stack, emit a production
  656. p = prod[-t]
  657. pname = p.name
  658. plen = p.len
  659. # Get production function
  660. sym = YaccSymbol()
  661. sym.type = pname # Production name
  662. sym.value = None
  663. if plen:
  664. targ = symstack[-plen-1:]
  665. targ[0] = sym
  666. #--! TRACKING
  667. if tracking:
  668. t1 = targ[1]
  669. sym.lineno = t1.lineno
  670. sym.lexpos = t1.lexpos
  671. t1 = targ[-1]
  672. sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
  673. sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
  674. #--! TRACKING
  675. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  676. # The code enclosed in this section is duplicated
  677. # below as a performance optimization. Make sure
  678. # changes get made in both locations.
  679. pslice.slice = targ
  680. try:
  681. # Call the grammar rule with our special slice object
  682. del symstack[-plen:]
  683. self.state = state
  684. p.callable(pslice)
  685. del statestack[-plen:]
  686. symstack.append(sym)
  687. state = goto[statestack[-1]][pname]
  688. statestack.append(state)
  689. except SyntaxError:
  690. # If an error was set. Enter error recovery state
  691. lookaheadstack.append(lookahead) # Save the current lookahead token
  692. symstack.extend(targ[1:-1]) # Put the production slice back on the stack
  693. statestack.pop() # Pop back one state (before the reduce)
  694. state = statestack[-1]
  695. sym.type = 'error'
  696. sym.value = 'error'
  697. lookahead = sym
  698. errorcount = error_count
  699. self.errorok = False
  700. continue
  701. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  702. else:
  703. #--! TRACKING
  704. if tracking:
  705. sym.lineno = lexer.lineno
  706. sym.lexpos = lexer.lexpos
  707. #--! TRACKING
  708. targ = [sym]
  709. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  710. # The code enclosed in this section is duplicated
  711. # above as a performance optimization. Make sure
  712. # changes get made in both locations.
  713. pslice.slice = targ
  714. try:
  715. # Call the grammar rule with our special slice object
  716. self.state = state
  717. p.callable(pslice)
  718. symstack.append(sym)
  719. state = goto[statestack[-1]][pname]
  720. statestack.append(state)
  721. except SyntaxError:
  722. # If an error was set. Enter error recovery state
  723. lookaheadstack.append(lookahead) # Save the current lookahead token
  724. statestack.pop() # Pop back one state (before the reduce)
  725. state = statestack[-1]
  726. sym.type = 'error'
  727. sym.value = 'error'
  728. lookahead = sym
  729. errorcount = error_count
  730. self.errorok = False
  731. continue
  732. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  733. if t == 0:
  734. n = symstack[-1]
  735. result = getattr(n, 'value', None)
  736. return result
  737. if t is None:
  738. # We have some kind of parsing error here. To handle
  739. # this, we are going to push the current token onto
  740. # the tokenstack and replace it with an 'error' token.
  741. # If there are any synchronization rules, they may
  742. # catch it.
  743. #
  744. # In addition to pushing the error token, we call call
  745. # the user defined p_error() function if this is the
  746. # first syntax error. This function is only called if
  747. # errorcount == 0.
  748. if errorcount == 0 or self.errorok:
  749. errorcount = error_count
  750. self.errorok = False
  751. errtoken = lookahead
  752. if errtoken.type == '$end':
  753. errtoken = None # End of file!
  754. if self.errorfunc:
  755. if errtoken and not hasattr(errtoken, 'lexer'):
  756. errtoken.lexer = lexer
  757. self.state = state
  758. tok = call_errorfunc(self.errorfunc, errtoken, self)
  759. if self.errorok:
  760. # User must have done some kind of panic
  761. # mode recovery on their own. The
  762. # returned token is the next lookahead
  763. lookahead = tok
  764. errtoken = None
  765. continue
  766. else:
  767. if errtoken:
  768. if hasattr(errtoken, 'lineno'):
  769. lineno = lookahead.lineno
  770. else:
  771. lineno = 0
  772. if lineno:
  773. sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
  774. else:
  775. sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
  776. else:
  777. sys.stderr.write('yacc: Parse error in input. EOF\n')
  778. return
  779. else:
  780. errorcount = error_count
  781. # case 1: the statestack only has 1 entry on it. If we're in this state, the
  782. # entire parse has been rolled back and we're completely hosed. The token is
  783. # discarded and we just keep going.
  784. if len(statestack) <= 1 and lookahead.type != '$end':
  785. lookahead = None
  786. errtoken = None
  787. state = 0
  788. # Nuke the pushback stack
  789. del lookaheadstack[:]
  790. continue
  791. # case 2: the statestack has a couple of entries on it, but we're
  792. # at the end of the file. nuke the top entry and generate an error token
  793. # Start nuking entries on the stack
  794. if lookahead.type == '$end':
  795. # Whoa. We're really hosed here. Bail out
  796. return
  797. if lookahead.type != 'error':
  798. sym = symstack[-1]
  799. if sym.type == 'error':
  800. # Hmmm. Error is on top of stack, we'll just nuke input
  801. # symbol and continue
  802. #--! TRACKING
  803. if tracking:
  804. sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
  805. sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
  806. #--! TRACKING
  807. lookahead = None
  808. continue
  809. # Create the error symbol for the first time and make it the new lookahead symbol
  810. t = YaccSymbol()
  811. t.type = 'error'
  812. if hasattr(lookahead, 'lineno'):
  813. t.lineno = t.endlineno = lookahead.lineno
  814. if hasattr(lookahead, 'lexpos'):
  815. t.lexpos = t.endlexpos = lookahead.lexpos
  816. t.value = lookahead
  817. lookaheadstack.append(lookahead)
  818. lookahead = t
  819. else:
  820. sym = symstack.pop()
  821. #--! TRACKING
  822. if tracking:
  823. lookahead.lineno = sym.lineno
  824. lookahead.lexpos = sym.lexpos
  825. #--! TRACKING
  826. statestack.pop()
  827. state = statestack[-1]
  828. continue
  829. # Call an error function here
  830. raise RuntimeError('yacc: internal parser error!!!\n')
  831. #--! parseopt-end
  832. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  833. # parseopt_notrack().
  834. #
  835. # Optimized version of parseopt() with line number tracking removed.
  836. # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
  837. # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
  838. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  839. def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  840. #--! parseopt-notrack-start
  841. lookahead = None # Current lookahead symbol
  842. lookaheadstack = [] # Stack of lookahead symbols
  843. actions = self.action # Local reference to action table (to avoid lookup on self.)
  844. goto = self.goto # Local reference to goto table (to avoid lookup on self.)
  845. prod = self.productions # Local reference to production list (to avoid lookup on self.)
  846. defaulted_states = self.defaulted_states # Local reference to defaulted states
  847. pslice = YaccProduction(None) # Production object passed to grammar rules
  848. errorcount = 0 # Used during error recovery
  849. # If no lexer was given, we will try to use the lex module
  850. if not lexer:
  851. from . import lex
  852. lexer = lex.lexer
  853. # Set up the lexer and parser objects on pslice
  854. pslice.lexer = lexer
  855. pslice.parser = self
  856. # If input was supplied, pass to lexer
  857. if input is not None:
  858. lexer.input(input)
  859. if tokenfunc is None:
  860. # Tokenize function
  861. get_token = lexer.token
  862. else:
  863. get_token = tokenfunc
  864. # Set the parser() token method (sometimes used in error recovery)
  865. self.token = get_token
  866. # Set up the state and symbol stacks
  867. statestack = [] # Stack of parsing states
  868. self.statestack = statestack
  869. symstack = [] # Stack of grammar symbols
  870. self.symstack = symstack
  871. pslice.stack = symstack # Put in the production
  872. errtoken = None # Err token
  873. # The start state is assumed to be (0,$end)
  874. statestack.append(0)
  875. sym = YaccSymbol()
  876. sym.type = '$end'
  877. symstack.append(sym)
  878. state = 0
  879. while True:
  880. # Get the next symbol on the input. If a lookahead symbol
  881. # is already set, we just use that. Otherwise, we'll pull
  882. # the next token off of the lookaheadstack or from the lexer
  883. if state not in defaulted_states:
  884. if not lookahead:
  885. if not lookaheadstack:
  886. lookahead = get_token() # Get the next token
  887. else:
  888. lookahead = lookaheadstack.pop()
  889. if not lookahead:
  890. lookahead = YaccSymbol()
  891. lookahead.type = '$end'
  892. # Check the action table
  893. ltype = lookahead.type
  894. t = actions[state].get(ltype)
  895. else:
  896. t = defaulted_states[state]
  897. if t is not None:
  898. if t > 0:
  899. # shift a symbol on the stack
  900. statestack.append(t)
  901. state = t
  902. symstack.append(lookahead)
  903. lookahead = None
  904. # Decrease error count on successful shift
  905. if errorcount:
  906. errorcount -= 1
  907. continue
  908. if t < 0:
  909. # reduce a symbol on the stack, emit a production
  910. p = prod[-t]
  911. pname = p.name
  912. plen = p.len
  913. # Get production function
  914. sym = YaccSymbol()
  915. sym.type = pname # Production name
  916. sym.value = None
  917. if plen:
  918. targ = symstack[-plen-1:]
  919. targ[0] = sym
  920. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  921. # The code enclosed in this section is duplicated
  922. # below as a performance optimization. Make sure
  923. # changes get made in both locations.
  924. pslice.slice = targ
  925. try:
  926. # Call the grammar rule with our special slice object
  927. del symstack[-plen:]
  928. self.state = state
  929. p.callable(pslice)
  930. del statestack[-plen:]
  931. symstack.append(sym)
  932. state = goto[statestack[-1]][pname]
  933. statestack.append(state)
  934. except SyntaxError:
  935. # If an error was set. Enter error recovery state
  936. lookaheadstack.append(lookahead) # Save the current lookahead token
  937. symstack.extend(targ[1:-1]) # Put the production slice back on the stack
  938. statestack.pop() # Pop back one state (before the reduce)
  939. state = statestack[-1]
  940. sym.type = 'error'
  941. sym.value = 'error'
  942. lookahead = sym
  943. errorcount = error_count
  944. self.errorok = False
  945. continue
  946. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  947. else:
  948. targ = [sym]
  949. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  950. # The code enclosed in this section is duplicated
  951. # above as a performance optimization. Make sure
  952. # changes get made in both locations.
  953. pslice.slice = targ
  954. try:
  955. # Call the grammar rule with our special slice object
  956. self.state = state
  957. p.callable(pslice)
  958. symstack.append(sym)
  959. state = goto[statestack[-1]][pname]
  960. statestack.append(state)
  961. except SyntaxError:
  962. # If an error was set. Enter error recovery state
  963. lookaheadstack.append(lookahead) # Save the current lookahead token
  964. statestack.pop() # Pop back one state (before the reduce)
  965. state = statestack[-1]
  966. sym.type = 'error'
  967. sym.value = 'error'
  968. lookahead = sym
  969. errorcount = error_count
  970. self.errorok = False
  971. continue
  972. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  973. if t == 0:
  974. n = symstack[-1]
  975. result = getattr(n, 'value', None)
  976. return result
  977. if t is None:
  978. # We have some kind of parsing error here. To handle
  979. # this, we are going to push the current token onto
  980. # the tokenstack and replace it with an 'error' token.
  981. # If there are any synchronization rules, they may
  982. # catch it.
  983. #
  984. # In addition to pushing the error token, we call call
  985. # the user defined p_error() function if this is the
  986. # first syntax error. This function is only called if
  987. # errorcount == 0.
  988. if errorcount == 0 or self.errorok:
  989. errorcount = error_count
  990. self.errorok = False
  991. errtoken = lookahead
  992. if errtoken.type == '$end':
  993. errtoken = None # End of file!
  994. if self.errorfunc:
  995. if errtoken and not hasattr(errtoken, 'lexer'):
  996. errtoken.lexer = lexer
  997. self.state = state
  998. tok = call_errorfunc(self.errorfunc, errtoken, self)
  999. if self.errorok:
  1000. # User must have done some kind of panic
  1001. # mode recovery on their own. The
  1002. # returned token is the next lookahead
  1003. lookahead = tok
  1004. errtoken = None
  1005. continue
  1006. else:
  1007. if errtoken:
  1008. if hasattr(errtoken, 'lineno'):
  1009. lineno = lookahead.lineno
  1010. else:
  1011. lineno = 0
  1012. if lineno:
  1013. sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
  1014. else:
  1015. sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
  1016. else:
  1017. sys.stderr.write('yacc: Parse error in input. EOF\n')
  1018. return
  1019. else:
  1020. errorcount = error_count
  1021. # case 1: the statestack only has 1 entry on it. If we're in this state, the
  1022. # entire parse has been rolled back and we're completely hosed. The token is
  1023. # discarded and we just keep going.
  1024. if len(statestack) <= 1 and lookahead.type != '$end':
  1025. lookahead = None
  1026. errtoken = None
  1027. state = 0
  1028. # Nuke the pushback stack
  1029. del lookaheadstack[:]
  1030. continue
  1031. # case 2: the statestack has a couple of entries on it, but we're
  1032. # at the end of the file. nuke the top entry and generate an error token
  1033. # Start nuking entries on the stack
  1034. if lookahead.type == '$end':
  1035. # Whoa. We're really hosed here. Bail out
  1036. return
  1037. if lookahead.type != 'error':
  1038. sym = symstack[-1]
  1039. if sym.type == 'error':
  1040. # Hmmm. Error is on top of stack, we'll just nuke input
  1041. # symbol and continue
  1042. lookahead = None
  1043. continue
  1044. # Create the error symbol for the first time and make it the new lookahead symbol
  1045. t = YaccSymbol()
  1046. t.type = 'error'
  1047. if hasattr(lookahead, 'lineno'):
  1048. t.lineno = t.endlineno = lookahead.lineno
  1049. if hasattr(lookahead, 'lexpos'):
  1050. t.lexpos = t.endlexpos = lookahead.lexpos
  1051. t.value = lookahead
  1052. lookaheadstack.append(lookahead)
  1053. lookahead = t
  1054. else:
  1055. sym = symstack.pop()
  1056. statestack.pop()
  1057. state = statestack[-1]
  1058. continue
  1059. # Call an error function here
  1060. raise RuntimeError('yacc: internal parser error!!!\n')
  1061. #--! parseopt-notrack-end
  1062. # -----------------------------------------------------------------------------
  1063. # === Grammar Representation ===
  1064. #
  1065. # The following functions, classes, and variables are used to represent and
  1066. # manipulate the rules that make up a grammar.
  1067. # -----------------------------------------------------------------------------
  1068. # regex matching identifiers
  1069. _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
  1070. # -----------------------------------------------------------------------------
  1071. # class Production:
  1072. #
  1073. # This class stores the raw information about a single production or grammar rule.
  1074. # A grammar rule refers to a specification such as this:
  1075. #
  1076. # expr : expr PLUS term
  1077. #
  1078. # Here are the basic attributes defined on all productions
  1079. #
  1080. # name - Name of the production. For example 'expr'
  1081. # prod - A list of symbols on the right side ['expr','PLUS','term']
  1082. # prec - Production precedence level
  1083. # number - Production number.
  1084. # func - Function that executes on reduce
  1085. # file - File where production function is defined
  1086. # lineno - Line number where production function is defined
  1087. #
  1088. # The following attributes are defined or optional.
  1089. #
  1090. # len - Length of the production (number of symbols on right hand side)
  1091. # usyms - Set of unique symbols found in the production
  1092. # -----------------------------------------------------------------------------
  1093. class Production(object):
  1094. reduced = 0
  1095. def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
  1096. self.name = name
  1097. self.prod = tuple(prod)
  1098. self.number = number
  1099. self.func = func
  1100. self.callable = None
  1101. self.file = file
  1102. self.line = line
  1103. self.prec = precedence
  1104. # Internal settings used during table construction
  1105. self.len = len(self.prod) # Length of the production
  1106. # Create a list of unique production symbols used in the production
  1107. self.usyms = []
  1108. for s in self.prod:
  1109. if s not in self.usyms:
  1110. self.usyms.append(s)
  1111. # List of all LR items for the production
  1112. self.lr_items = []
  1113. self.lr_next = None
  1114. # Create a string representation
  1115. if self.prod:
  1116. self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
  1117. else:
  1118. self.str = '%s -> <empty>' % self.name
  1119. def __str__(self):
  1120. return self.str
  1121. def __repr__(self):
  1122. return 'Production(' + str(self) + ')'
  1123. def __len__(self):
  1124. return len(self.prod)
  1125. def __nonzero__(self):
  1126. return 1
  1127. def __getitem__(self, index):
  1128. return self.prod[index]
  1129. # Return the nth lr_item from the production (or None if at the end)
  1130. def lr_item(self, n):
  1131. if n > len(self.prod):
  1132. return None
  1133. p = LRItem(self, n)
  1134. # Precompute the list of productions immediately following.
  1135. try:
  1136. p.lr_after = Prodnames[p.prod[n+1]]
  1137. except (IndexError, KeyError):
  1138. p.lr_after = []
  1139. try:
  1140. p.lr_before = p.prod[n-1]
  1141. except IndexError:
  1142. p.lr_before = None
  1143. return p
  1144. # Bind the production function name to a callable
  1145. def bind(self, pdict):
  1146. if self.func:
  1147. self.callable = pdict[self.func]
  1148. # This class serves as a minimal standin for Production objects when
  1149. # reading table data from files. It only contains information
  1150. # actually used by the LR parsing engine, plus some additional
  1151. # debugging information.
  1152. class MiniProduction(object):
  1153. def __init__(self, str, name, len, func, file, line):
  1154. self.name = name
  1155. self.len = len
  1156. self.func = func
  1157. self.callable = None
  1158. self.file = file
  1159. self.line = line
  1160. self.str = str
  1161. def __str__(self):
  1162. return self.str
  1163. def __repr__(self):
  1164. return 'MiniProduction(%s)' % self.str
  1165. # Bind the production function name to a callable
  1166. def bind(self, pdict):
  1167. if self.func:
  1168. self.callable = pdict[self.func]
  1169. # -----------------------------------------------------------------------------
  1170. # class LRItem
  1171. #
  1172. # This class represents a specific stage of parsing a production rule. For
  1173. # example:
  1174. #
  1175. # expr : expr . PLUS term
  1176. #
  1177. # In the above, the "." represents the current location of the parse. Here
  1178. # basic attributes:
  1179. #
  1180. # name - Name of the production. For example 'expr'
  1181. # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
  1182. # number - Production number.
  1183. #
  1184. # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
  1185. # then lr_next refers to 'expr -> expr PLUS . term'
  1186. # lr_index - LR item index (location of the ".") in the prod list.
  1187. # lookaheads - LALR lookahead symbols for this item
  1188. # len - Length of the production (number of symbols on right hand side)
  1189. # lr_after - List of all productions that immediately follow
  1190. # lr_before - Grammar symbol immediately before
  1191. # -----------------------------------------------------------------------------
  1192. class LRItem(object):
  1193. def __init__(self, p, n):
  1194. self.name = p.name
  1195. self.prod = list(p.prod)
  1196. self.number = p.number
  1197. self.lr_index = n
  1198. self.lookaheads = {}
  1199. self.prod.insert(n, '.')
  1200. self.prod = tuple(self.prod)
  1201. self.len = len(self.prod)
  1202. self.usyms = p.usyms
  1203. def __str__(self):
  1204. if self.prod:
  1205. s = '%s -> %s' % (self.name, ' '.join(self.prod))
  1206. else:
  1207. s = '%s -> <empty>' % self.name
  1208. return s
  1209. def __repr__(self):
  1210. return 'LRItem(' + str(self) + ')'
  1211. # -----------------------------------------------------------------------------
  1212. # rightmost_terminal()
  1213. #
  1214. # Return the rightmost terminal from a list of symbols. Used in add_production()
  1215. # -----------------------------------------------------------------------------
  1216. def rightmost_terminal(symbols, terminals):
  1217. i = len(symbols) - 1
  1218. while i >= 0:
  1219. if symbols[i] in terminals:
  1220. return symbols[i]
  1221. i -= 1
  1222. return None
  1223. # -----------------------------------------------------------------------------
  1224. # === GRAMMAR CLASS ===
  1225. #
  1226. # The following class represents the contents of the specified grammar along
  1227. # with various computed properties such as first sets, follow sets, LR items, etc.
  1228. # This data is used for critical parts of the table generation process later.
  1229. # -----------------------------------------------------------------------------
  1230. class GrammarError(YaccError):
  1231. pass
  1232. class Grammar(object):
  1233. def __init__(self, terminals):
  1234. self.Productions = [None] # A list of all of the productions. The first
  1235. # entry is always reserved for the purpose of
  1236. # building an augmented grammar
  1237. self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
  1238. # productions of that nonterminal.
  1239. self.Prodmap = {} # A dictionary that is only used to detect duplicate
  1240. # productions.
  1241. self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
  1242. # list of the rules where they are used.
  1243. for term in terminals:
  1244. self.Terminals[term] = []
  1245. self.Terminals['error'] = []
  1246. self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
  1247. # of rule numbers where they are used.
  1248. self.First = {} # A dictionary of precomputed FIRST(x) symbols
  1249. self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
  1250. self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
  1251. # form ('right',level) or ('nonassoc', level) or ('left',level)
  1252. self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
  1253. # This is only used to provide error checking and to generate
  1254. # a warning about unused precedence rules.
  1255. self.Start = None # Starting symbol for the grammar
  1256. def __len__(self):
  1257. return len(self.Productions)
  1258. def __getitem__(self, index):
  1259. return self.Productions[index]
  1260. # -----------------------------------------------------------------------------
  1261. # set_precedence()
  1262. #
  1263. # Sets the precedence for a given terminal. assoc is the associativity such as
  1264. # 'left','right', or 'nonassoc'. level is a numeric level.
  1265. #
  1266. # -----------------------------------------------------------------------------
  1267. def set_precedence(self, term, assoc, level):
  1268. assert self.Productions == [None], 'Must call set_precedence() before add_production()'
  1269. if term in self.Precedence:
  1270. raise GrammarError('Precedence already specified for terminal %r' % term)
  1271. if assoc not in ['left', 'right', 'nonassoc']:
  1272. raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
  1273. self.Precedence[term] = (assoc, level)
  1274. # -----------------------------------------------------------------------------
  1275. # add_production()
  1276. #
  1277. # Given an action function, this function assembles a production rule and
  1278. # computes its precedence level.
  1279. #
  1280. # The production rule is supplied as a list of symbols. For example,
  1281. # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
  1282. # symbols ['expr','PLUS','term'].
  1283. #
  1284. # Precedence is determined by the precedence of the right-most non-terminal
  1285. # or the precedence of a terminal specified by %prec.
  1286. #
  1287. # A variety of error checks are performed to make sure production symbols
  1288. # are valid and that %prec is used correctly.
  1289. # -----------------------------------------------------------------------------
  1290. def add_production(self, prodname, syms, func=None, file='', line=0):
  1291. if prodname in self.Terminals:
  1292. raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
  1293. if prodname == 'error':
  1294. raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
  1295. if not _is_identifier.match(prodname):
  1296. raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
  1297. # Look for literal tokens
  1298. for n, s in enumerate(syms):
  1299. if s[0] in "'\"":
  1300. try:
  1301. c = eval(s)
  1302. if (len(c) > 1):
  1303. raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
  1304. (file, line, s, prodname))
  1305. if c not in self.Terminals:
  1306. self.Terminals[c] = []
  1307. syms[n] = c
  1308. continue
  1309. except SyntaxError:
  1310. pass
  1311. if not _is_identifier.match(s) and s != '%prec':
  1312. raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
  1313. # Determine the precedence level
  1314. if '%prec' in syms:
  1315. if syms[-1] == '%prec':
  1316. raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
  1317. if syms[-2] != '%prec':
  1318. raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
  1319. (file, line))
  1320. precname = syms[-1]
  1321. prodprec = self.Precedence.get(precname)
  1322. if not prodprec:
  1323. raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
  1324. else:
  1325. self.UsedPrecedence.add(precname)
  1326. del syms[-2:] # Drop %prec from the rule
  1327. else:
  1328. # If no %prec, precedence is determined by the rightmost terminal symbol
  1329. precname = rightmost_terminal(syms, self.Terminals)
  1330. prodprec = self.Precedence.get(precname, ('right', 0))
  1331. # See if the rule is already in the rulemap
  1332. map = '%s -> %s' % (prodname, syms)
  1333. if map in self.Prodmap:
  1334. m = self.Prodmap[map]
  1335. raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
  1336. 'Previous definition at %s:%d' % (m.file, m.line))
  1337. # From this point on, everything is valid. Create a new Production instance
  1338. pnumber = len(self.Productions)
  1339. if prodname not in self.Nonterminals:
  1340. self.Nonterminals[prodname] = []
  1341. # Add the production number to Terminals and Nonterminals
  1342. for t in syms:
  1343. if t in self.Terminals:
  1344. self.Terminals[t].append(pnumber)
  1345. else:
  1346. if t not in self.Nonterminals:
  1347. self.Nonterminals[t] = []
  1348. self.Nonterminals[t].append(pnumber)
  1349. # Create a production and add it to the list of productions
  1350. p = Production(pnumber, prodname, syms, prodprec, func, file, line)
  1351. self.Productions.append(p)
  1352. self.Prodmap[map] = p
  1353. # Add to the global productions list
  1354. try:
  1355. self.Prodnames[prodname].append(p)
  1356. except KeyError:
  1357. self.Prodnames[prodname] = [p]
  1358. # -----------------------------------------------------------------------------
  1359. # set_start()
  1360. #
  1361. # Sets the starting symbol and creates the augmented grammar. Production
  1362. # rule 0 is S' -> start where start is the start symbol.
  1363. # -----------------------------------------------------------------------------
  1364. def set_start(self, start=None):
  1365. if not start:
  1366. start = self.Productions[1].name
  1367. if start not in self.Nonterminals:
  1368. raise GrammarError('start symbol %s undefined' % start)
  1369. self.Productions[0] = Production(0, "S'", [start])
  1370. self.Nonterminals[start].append(0)
  1371. self.Start = start
  1372. # -----------------------------------------------------------------------------
  1373. # find_unreachable()
  1374. #
  1375. # Find all of the nonterminal symbols that can't be reached from the starting
  1376. # symbol. Returns a list of nonterminals that can't be reached.
  1377. # -----------------------------------------------------------------------------
  1378. def find_unreachable(self):
  1379. # Mark all symbols that are reachable from a symbol s
  1380. def mark_reachable_from(s):
  1381. if s in reachable:
  1382. return
  1383. reachable.add(s)
  1384. for p in self.Prodnames.get(s, []):
  1385. for r in p.prod:
  1386. mark_reachable_from(r)
  1387. reachable = set()
  1388. mark_reachable_from(self.Productions[0].prod[0])
  1389. return [s for s in self.Nonterminals if s not in reachable]
  1390. # -----------------------------------------------------------------------------
  1391. # infinite_cycles()
  1392. #
  1393. # This function looks at the various parsing rules and tries to detect
  1394. # infinite recursion cycles (grammar rules where there is no possible way
  1395. # to derive a string of only terminals).
  1396. # -----------------------------------------------------------------------------
  1397. def infinite_cycles(self):
  1398. terminates = {}
  1399. # Terminals:
  1400. for t in self.Terminals:
  1401. terminates[t] = True
  1402. terminates['$end'] = True
  1403. # Nonterminals:
  1404. # Initialize to false:
  1405. for n in self.Nonterminals:
  1406. terminates[n] = False
  1407. # Then propagate termination until no change:
  1408. while True:
  1409. some_change = False
  1410. for (n, pl) in self.Prodnames.items():
  1411. # Nonterminal n terminates iff any of its productions terminates.
  1412. for p in pl:
  1413. # Production p terminates iff all of its rhs symbols terminate.
  1414. for s in p.prod:
  1415. if not terminates[s]:
  1416. # The symbol s does not terminate,
  1417. # so production p does not terminate.
  1418. p_terminates = False
  1419. break
  1420. else:
  1421. # didn't break from the loop,
  1422. # so every symbol s terminates
  1423. # so production p terminates.
  1424. p_terminates = True
  1425. if p_terminates:
  1426. # symbol n terminates!
  1427. if not terminates[n]:
  1428. terminates[n] = True
  1429. some_change = True
  1430. # Don't need to consider any more productions for this n.
  1431. break
  1432. if not some_change:
  1433. break
  1434. infinite = []
  1435. for (s, term) in terminates.items():
  1436. if not term:
  1437. if s not in self.Prodnames and s not in self.Terminals and s != 'error':
  1438. # s is used-but-not-defined, and we've already warned of that,
  1439. # so it would be overkill to say that it's also non-terminating.
  1440. pass
  1441. else:
  1442. infinite.append(s)
  1443. return infinite
  1444. # -----------------------------------------------------------------------------
  1445. # undefined_symbols()
  1446. #
  1447. # Find all symbols that were used the grammar, but not defined as tokens or
  1448. # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
  1449. # and prod is the production where the symbol was used.
  1450. # -----------------------------------------------------------------------------
  1451. def undefined_symbols(self):
  1452. result = []
  1453. for p in self.Productions:
  1454. if not p:
  1455. continue
  1456. for s in p.prod:
  1457. if s not in self.Prodnames and s not in self.Terminals and s != 'error':
  1458. result.append((s, p))
  1459. return result
  1460. # -----------------------------------------------------------------------------
  1461. # unused_terminals()
  1462. #
  1463. # Find all terminals that were defined, but not used by the grammar. Returns
  1464. # a list of all symbols.
  1465. # -----------------------------------------------------------------------------
  1466. def unused_terminals(self):
  1467. unused_tok = []
  1468. for s, v in self.Terminals.items():
  1469. if s != 'error' and not v:
  1470. unused_tok.append(s)
  1471. return unused_tok
  1472. # ------------------------------------------------------------------------------
  1473. # unused_rules()
  1474. #
  1475. # Find all grammar rules that were defined, but not used (maybe not reachable)
  1476. # Returns a list of productions.
  1477. # ------------------------------------------------------------------------------
  1478. def unused_rules(self):
  1479. unused_prod = []
  1480. for s, v in self.Nonterminals.items():
  1481. if not v:
  1482. p = self.Prodnames[s][0]
  1483. unused_prod.append(p)
  1484. return unused_prod
  1485. # -----------------------------------------------------------------------------
  1486. # unused_precedence()
  1487. #
  1488. # Returns a list of tuples (term,precedence) corresponding to precedence
  1489. # rules that were never used by the grammar. term is the name of the terminal
  1490. # on which precedence was applied and precedence is a string such as 'left' or
  1491. # 'right' corresponding to the type of precedence.
  1492. # -----------------------------------------------------------------------------
  1493. def unused_precedence(self):
  1494. unused = []
  1495. for termname in self.Precedence:
  1496. if not (termname in self.Terminals or termname in self.UsedPrecedence):
  1497. unused.append((termname, self.Precedence[termname][0]))
  1498. return unused
  1499. # -------------------------------------------------------------------------
  1500. # _first()
  1501. #
  1502. # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
  1503. #
  1504. # During execution of compute_first1, the result may be incomplete.
  1505. # Afterward (e.g., when called from compute_follow()), it will be complete.
  1506. # -------------------------------------------------------------------------
  1507. def _first(self, beta):
  1508. # We are computing First(x1,x2,x3,...,xn)
  1509. result = []
  1510. for x in beta:
  1511. x_produces_empty = False
  1512. # Add all the non-<empty> symbols of First[x] to the result.
  1513. for f in self.First[x]:
  1514. if f == '<empty>':
  1515. x_produces_empty = True
  1516. else:
  1517. if f not in result:
  1518. result.append(f)
  1519. if x_produces_empty:
  1520. # We have to consider the next x in beta,
  1521. # i.e. stay in the loop.
  1522. pass
  1523. else:
  1524. # We don't have to consider any further symbols in beta.
  1525. break
  1526. else:
  1527. # There was no 'break' from the loop,
  1528. # so x_produces_empty was true for all x in beta,
  1529. # so beta produces empty as well.
  1530. result.append('<empty>')
  1531. return result
  1532. # -------------------------------------------------------------------------
  1533. # compute_first()
  1534. #
  1535. # Compute the value of FIRST1(X) for all symbols
  1536. # -------------------------------------------------------------------------
  1537. def compute_first(self):
  1538. if self.First:
  1539. return self.First
  1540. # Terminals:
  1541. for t in self.Terminals:
  1542. self.First[t] = [t]
  1543. self.First['$end'] = ['$end']
  1544. # Nonterminals:
  1545. # Initialize to the empty set:
  1546. for n in self.Nonterminals:
  1547. self.First[n] = []
  1548. # Then propagate symbols until no change:
  1549. while True:
  1550. some_change = False
  1551. for n in self.Nonterminals:
  1552. for p in self.Prodnames[n]:
  1553. for f in self._first(p.prod):
  1554. if f not in self.First[n]:
  1555. self.First[n].append(f)
  1556. some_change = True
  1557. if not some_change:
  1558. break
  1559. return self.First
  1560. # ---------------------------------------------------------------------
  1561. # compute_follow()
  1562. #
  1563. # Computes all of the follow sets for every non-terminal symbol. The
  1564. # follow set is the set of all symbols that might follow a given
  1565. # non-terminal. See the Dragon book, 2nd Ed. p. 189.
  1566. # ---------------------------------------------------------------------
  1567. def compute_follow(self, start=None):
  1568. # If already computed, return the result
  1569. if self.Follow:
  1570. return self.Follow
  1571. # If first sets not computed yet, do that first.
  1572. if not self.First:
  1573. self.compute_first()
  1574. # Add '$end' to the follow list of the start symbol
  1575. for k in self.Nonterminals:
  1576. self.Follow[k] = []
  1577. if not start:
  1578. start = self.Productions[1].name
  1579. self.Follow[start] = ['$end']
  1580. while True:
  1581. didadd = False
  1582. for p in self.Productions[1:]:
  1583. # Here is the production set
  1584. for i, B in enumerate(p.prod):
  1585. if B in self.Nonterminals:
  1586. # Okay. We got a non-terminal in a production
  1587. fst = self._first(p.prod[i+1:])
  1588. hasempty = False
  1589. for f in fst:
  1590. if f != '<empty>' and f not in self.Follow[B]:
  1591. self.Follow[B].append(f)
  1592. didadd = True
  1593. if f == '<empty>':
  1594. hasempty = True
  1595. if hasempty or i == (len(p.prod)-1):
  1596. # Add elements of follow(a) to follow(b)
  1597. for f in self.Follow[p.name]:
  1598. if f not in self.Follow[B]:
  1599. self.Follow[B].append(f)
  1600. didadd = True
  1601. if not didadd:
  1602. break
  1603. return self.Follow
  1604. # -----------------------------------------------------------------------------
  1605. # build_lritems()
  1606. #
  1607. # This function walks the list of productions and builds a complete set of the
  1608. # LR items. The LR items are stored in two ways: First, they are uniquely
  1609. # numbered and placed in the list _lritems. Second, a linked list of LR items
  1610. # is built for each production. For example:
  1611. #
  1612. # E -> E PLUS E
  1613. #
  1614. # Creates the list
  1615. #
  1616. # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
  1617. # -----------------------------------------------------------------------------
  1618. def build_lritems(self):
  1619. for p in self.Productions:
  1620. lastlri = p
  1621. i = 0
  1622. lr_items = []
  1623. while True:
  1624. if i > len(p):
  1625. lri = None
  1626. else:
  1627. lri = LRItem(p, i)
  1628. # Precompute the list of productions immediately following
  1629. try:
  1630. lri.lr_after = self.Prodnames[lri.prod[i+1]]
  1631. except (IndexError, KeyError):
  1632. lri.lr_after = []
  1633. try:
  1634. lri.lr_before = lri.prod[i-1]
  1635. except IndexError:
  1636. lri.lr_before = None
  1637. lastlri.lr_next = lri
  1638. if not lri:
  1639. break
  1640. lr_items.append(lri)
  1641. lastlri = lri
  1642. i += 1
  1643. p.lr_items = lr_items
  1644. # -----------------------------------------------------------------------------
  1645. # == Class LRTable ==
  1646. #
  1647. # This basic class represents a basic table of LR parsing information.
  1648. # Methods for generating the tables are not defined here. They are defined
  1649. # in the derived class LRGeneratedTable.
  1650. # -----------------------------------------------------------------------------
  1651. class VersionError(YaccError):
  1652. pass
  1653. class LRTable(object):
  1654. def __init__(self):
  1655. self.lr_action = None
  1656. self.lr_goto = None
  1657. self.lr_productions = None
  1658. self.lr_method = None
  1659. def read_table(self, module):
  1660. if isinstance(module, types.ModuleType):
  1661. parsetab = module
  1662. else:
  1663. exec('import %s' % module)
  1664. parsetab = sys.modules[module]
  1665. if parsetab._tabversion != __tabversion__:
  1666. raise VersionError('yacc table file version is out of date')
  1667. self.lr_action = parsetab._lr_action
  1668. self.lr_goto = parsetab._lr_goto
  1669. self.lr_productions = []
  1670. for p in parsetab._lr_productions:
  1671. self.lr_productions.append(MiniProduction(*p))
  1672. self.lr_method = parsetab._lr_method
  1673. return parsetab._lr_signature
  1674. def read_pickle(self, filename):
  1675. try:
  1676. import cPickle as pickle
  1677. except ImportError:
  1678. import pickle
  1679. if not os.path.exists(filename):
  1680. raise ImportError
  1681. in_f = open(filename, 'rb')
  1682. tabversion = pickle.load(in_f)
  1683. if tabversion != __tabversion__:
  1684. raise VersionError('yacc table file version is out of date')
  1685. self.lr_method = pickle.load(in_f)
  1686. signature = pickle.load(in_f)
  1687. self.lr_action = pickle.load(in_f)
  1688. self.lr_goto = pickle.load(in_f)
  1689. productions = pickle.load(in_f)
  1690. self.lr_productions = []
  1691. for p in productions:
  1692. self.lr_productions.append(MiniProduction(*p))
  1693. in_f.close()
  1694. return signature
  1695. # Bind all production function names to callable objects in pdict
  1696. def bind_callables(self, pdict):
  1697. for p in self.lr_productions:
  1698. p.bind(pdict)
  1699. # -----------------------------------------------------------------------------
  1700. # === LR Generator ===
  1701. #
  1702. # The following classes and functions are used to generate LR parsing tables on
  1703. # a grammar.
  1704. # -----------------------------------------------------------------------------
  1705. # -----------------------------------------------------------------------------
  1706. # digraph()
  1707. # traverse()
  1708. #
  1709. # The following two functions are used to compute set valued functions
  1710. # of the form:
  1711. #
  1712. # F(x) = F'(x) U U{F(y) | x R y}
  1713. #
  1714. # This is used to compute the values of Read() sets as well as FOLLOW sets
  1715. # in LALR(1) generation.
  1716. #
  1717. # Inputs: X - An input set
  1718. # R - A relation
  1719. # FP - Set-valued function
  1720. # ------------------------------------------------------------------------------
  1721. def digraph(X, R, FP):
  1722. N = {}
  1723. for x in X:
  1724. N[x] = 0
  1725. stack = []
  1726. F = {}
  1727. for x in X:
  1728. if N[x] == 0:
  1729. traverse(x, N, stack, F, X, R, FP)
  1730. return F
  1731. def traverse(x, N, stack, F, X, R, FP):
  1732. stack.append(x)
  1733. d = len(stack)
  1734. N[x] = d
  1735. F[x] = FP(x) # F(X) <- F'(x)
  1736. rel = R(x) # Get y's related to x
  1737. for y in rel:
  1738. if N[y] == 0:
  1739. traverse(y, N, stack, F, X, R, FP)
  1740. N[x] = min(N[x], N[y])
  1741. for a in F.get(y, []):
  1742. if a not in F[x]:
  1743. F[x].append(a)
  1744. if N[x] == d:
  1745. N[stack[-1]] = MAXINT
  1746. F[stack[-1]] = F[x]
  1747. element = stack.pop()
  1748. while element != x:
  1749. N[stack[-1]] = MAXINT
  1750. F[stack[-1]] = F[x]
  1751. element = stack.pop()
  1752. class LALRError(YaccError):
  1753. pass
  1754. # -----------------------------------------------------------------------------
  1755. # == LRGeneratedTable ==
  1756. #
  1757. # This class implements the LR table generation algorithm. There are no
  1758. # public methods except for write()
  1759. # -----------------------------------------------------------------------------
  1760. class LRGeneratedTable(LRTable):
  1761. def __init__(self, grammar, method='LALR', log=None):
  1762. if method not in ['SLR', 'LALR']:
  1763. raise LALRError('Unsupported method %s' % method)
  1764. self.grammar = grammar
  1765. self.lr_method = method
  1766. # Set up the logger
  1767. if not log:
  1768. log = NullLogger()
  1769. self.log = log
  1770. # Internal attributes
  1771. self.lr_action = {} # Action table
  1772. self.lr_goto = {} # Goto table
  1773. self.lr_productions = grammar.Productions # Copy of grammar Production array
  1774. self.lr_goto_cache = {} # Cache of computed gotos
  1775. self.lr0_cidhash = {} # Cache of closures
  1776. self._add_count = 0 # Internal counter used to detect cycles
  1777. # Diagonistic information filled in by the table generator
  1778. self.sr_conflict = 0
  1779. self.rr_conflict = 0
  1780. self.conflicts = [] # List of conflicts
  1781. self.sr_conflicts = []
  1782. self.rr_conflicts = []
  1783. # Build the tables
  1784. self.grammar.build_lritems()
  1785. self.grammar.compute_first()
  1786. self.grammar.compute_follow()
  1787. self.lr_parse_table()
  1788. # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
  1789. def lr0_closure(self, I):
  1790. self._add_count += 1
  1791. # Add everything in I to J
  1792. J = I[:]
  1793. didadd = True
  1794. while didadd:
  1795. didadd = False
  1796. for j in J:
  1797. for x in j.lr_after:
  1798. if getattr(x, 'lr0_added', 0) == self._add_count:
  1799. continue
  1800. # Add B --> .G to J
  1801. J.append(x.lr_next)
  1802. x.lr0_added = self._add_count
  1803. didadd = True
  1804. return J
  1805. # Compute the LR(0) goto function goto(I,X) where I is a set
  1806. # of LR(0) items and X is a grammar symbol. This function is written
  1807. # in a way that guarantees uniqueness of the generated goto sets
  1808. # (i.e. the same goto set will never be returned as two different Python
  1809. # objects). With uniqueness, we can later do fast set comparisons using
  1810. # id(obj) instead of element-wise comparison.
  1811. def lr0_goto(self, I, x):
  1812. # First we look for a previously cached entry
  1813. g = self.lr_goto_cache.get((id(I), x))
  1814. if g:
  1815. return g
  1816. # Now we generate the goto set in a way that guarantees uniqueness
  1817. # of the result
  1818. s = self.lr_goto_cache.get(x)
  1819. if not s:
  1820. s = {}
  1821. self.lr_goto_cache[x] = s
  1822. gs = []
  1823. for p in I:
  1824. n = p.lr_next
  1825. if n and n.lr_before == x:
  1826. s1 = s.get(id(n))
  1827. if not s1:
  1828. s1 = {}
  1829. s[id(n)] = s1
  1830. gs.append(n)
  1831. s = s1
  1832. g = s.get('$end')
  1833. if not g:
  1834. if gs:
  1835. g = self.lr0_closure(gs)
  1836. s['$end'] = g
  1837. else:
  1838. s['$end'] = gs
  1839. self.lr_goto_cache[(id(I), x)] = g
  1840. return g
  1841. # Compute the LR(0) sets of item function
  1842. def lr0_items(self):
  1843. C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
  1844. i = 0
  1845. for I in C:
  1846. self.lr0_cidhash[id(I)] = i
  1847. i += 1
  1848. # Loop over the items in C and each grammar symbols
  1849. i = 0
  1850. while i < len(C):
  1851. I = C[i]
  1852. i += 1
  1853. # Collect all of the symbols that could possibly be in the goto(I,X) sets
  1854. asyms = {}
  1855. for ii in I:
  1856. for s in ii.usyms:
  1857. asyms[s] = None
  1858. for x in asyms:
  1859. g = self.lr0_goto(I, x)
  1860. if not g or id(g) in self.lr0_cidhash:
  1861. continue
  1862. self.lr0_cidhash[id(g)] = len(C)
  1863. C.append(g)
  1864. return C
  1865. # -----------------------------------------------------------------------------
  1866. # ==== LALR(1) Parsing ====
  1867. #
  1868. # LALR(1) parsing is almost exactly the same as SLR except that instead of
  1869. # relying upon Follow() sets when performing reductions, a more selective
  1870. # lookahead set that incorporates the state of the LR(0) machine is utilized.
  1871. # Thus, we mainly just have to focus on calculating the lookahead sets.
  1872. #
  1873. # The method used here is due to DeRemer and Pennelo (1982).
  1874. #
  1875. # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
  1876. # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
  1877. # Vol. 4, No. 4, Oct. 1982, pp. 615-649
  1878. #
  1879. # Further details can also be found in:
  1880. #
  1881. # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
  1882. # McGraw-Hill Book Company, (1985).
  1883. #
  1884. # -----------------------------------------------------------------------------
  1885. # -----------------------------------------------------------------------------
  1886. # compute_nullable_nonterminals()
  1887. #
  1888. # Creates a dictionary containing all of the non-terminals that might produce
  1889. # an empty production.
  1890. # -----------------------------------------------------------------------------
  1891. def compute_nullable_nonterminals(self):
  1892. nullable = set()
  1893. num_nullable = 0
  1894. while True:
  1895. for p in self.grammar.Productions[1:]:
  1896. if p.len == 0:
  1897. nullable.add(p.name)
  1898. continue
  1899. for t in p.prod:
  1900. if t not in nullable:
  1901. break
  1902. else:
  1903. nullable.add(p.name)
  1904. if len(nullable) == num_nullable:
  1905. break
  1906. num_nullable = len(nullable)
  1907. return nullable
  1908. # -----------------------------------------------------------------------------
  1909. # find_nonterminal_trans(C)
  1910. #
  1911. # Given a set of LR(0) items, this functions finds all of the non-terminal
  1912. # transitions. These are transitions in which a dot appears immediately before
  1913. # a non-terminal. Returns a list of tuples of the form (state,N) where state
  1914. # is the state number and N is the nonterminal symbol.
  1915. #
  1916. # The input C is the set of LR(0) items.
  1917. # -----------------------------------------------------------------------------
  1918. def find_nonterminal_transitions(self, C):
  1919. trans = []
  1920. for stateno, state in enumerate(C):
  1921. for p in state:
  1922. if p.lr_index < p.len - 1:
  1923. t = (stateno, p.prod[p.lr_index+1])
  1924. if t[1] in self.grammar.Nonterminals:
  1925. if t not in trans:
  1926. trans.append(t)
  1927. return trans
  1928. # -----------------------------------------------------------------------------
  1929. # dr_relation()
  1930. #
  1931. # Computes the DR(p,A) relationships for non-terminal transitions. The input
  1932. # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
  1933. #
  1934. # Returns a list of terminals.
  1935. # -----------------------------------------------------------------------------
  1936. def dr_relation(self, C, trans, nullable):
  1937. dr_set = {}
  1938. state, N = trans
  1939. terms = []
  1940. g = self.lr0_goto(C[state], N)
  1941. for p in g:
  1942. if p.lr_index < p.len - 1:
  1943. a = p.prod[p.lr_index+1]
  1944. if a in self.grammar.Terminals:
  1945. if a not in terms:
  1946. terms.append(a)
  1947. # This extra bit is to handle the start state
  1948. if state == 0 and N == self.grammar.Productions[0].prod[0]:
  1949. terms.append('$end')
  1950. return terms
  1951. # -----------------------------------------------------------------------------
  1952. # reads_relation()
  1953. #
  1954. # Computes the READS() relation (p,A) READS (t,C).
  1955. # -----------------------------------------------------------------------------
  1956. def reads_relation(self, C, trans, empty):
  1957. # Look for empty transitions
  1958. rel = []
  1959. state, N = trans
  1960. g = self.lr0_goto(C[state], N)
  1961. j = self.lr0_cidhash.get(id(g), -1)
  1962. for p in g:
  1963. if p.lr_index < p.len - 1:
  1964. a = p.prod[p.lr_index + 1]
  1965. if a in empty:
  1966. rel.append((j, a))
  1967. return rel
  1968. # -----------------------------------------------------------------------------
  1969. # compute_lookback_includes()
  1970. #
  1971. # Determines the lookback and includes relations
  1972. #
  1973. # LOOKBACK:
  1974. #
  1975. # This relation is determined by running the LR(0) state machine forward.
  1976. # For example, starting with a production "N : . A B C", we run it forward
  1977. # to obtain "N : A B C ." We then build a relationship between this final
  1978. # state and the starting state. These relationships are stored in a dictionary
  1979. # lookdict.
  1980. #
  1981. # INCLUDES:
  1982. #
  1983. # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
  1984. #
  1985. # This relation is used to determine non-terminal transitions that occur
  1986. # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
  1987. # if the following holds:
  1988. #
  1989. # B -> LAT, where T -> epsilon and p' -L-> p
  1990. #
  1991. # L is essentially a prefix (which may be empty), T is a suffix that must be
  1992. # able to derive an empty string. State p' must lead to state p with the string L.
  1993. #
  1994. # -----------------------------------------------------------------------------
  1995. def compute_lookback_includes(self, C, trans, nullable):
  1996. lookdict = {} # Dictionary of lookback relations
  1997. includedict = {} # Dictionary of include relations
  1998. # Make a dictionary of non-terminal transitions
  1999. dtrans = {}
  2000. for t in trans:
  2001. dtrans[t] = 1
  2002. # Loop over all transitions and compute lookbacks and includes
  2003. for state, N in trans:
  2004. lookb = []
  2005. includes = []
  2006. for p in C[state]:
  2007. if p.name != N:
  2008. continue
  2009. # Okay, we have a name match. We now follow the production all the way
  2010. # through the state machine until we get the . on the right hand side
  2011. lr_index = p.lr_index
  2012. j = state
  2013. while lr_index < p.len - 1:
  2014. lr_index = lr_index + 1
  2015. t = p.prod[lr_index]
  2016. # Check to see if this symbol and state are a non-terminal transition
  2017. if (j, t) in dtrans:
  2018. # Yes. Okay, there is some chance that this is an includes relation
  2019. # the only way to know for certain is whether the rest of the
  2020. # production derives empty
  2021. li = lr_index + 1
  2022. while li < p.len:
  2023. if p.prod[li] in self.grammar.Terminals:
  2024. break # No forget it
  2025. if p.prod[li] not in nullable:
  2026. break
  2027. li = li + 1
  2028. else:
  2029. # Appears to be a relation between (j,t) and (state,N)
  2030. includes.append((j, t))
  2031. g = self.lr0_goto(C[j], t) # Go to next set
  2032. j = self.lr0_cidhash.get(id(g), -1) # Go to next state
  2033. # When we get here, j is the final state, now we have to locate the production
  2034. for r in C[j]:
  2035. if r.name != p.name:
  2036. continue
  2037. if r.len != p.len:
  2038. continue
  2039. i = 0
  2040. # This look is comparing a production ". A B C" with "A B C ."
  2041. while i < r.lr_index:
  2042. if r.prod[i] != p.prod[i+1]:
  2043. break
  2044. i = i + 1
  2045. else:
  2046. lookb.append((j, r))
  2047. for i in includes:
  2048. if i not in includedict:
  2049. includedict[i] = []
  2050. includedict[i].append((state, N))
  2051. lookdict[(state, N)] = lookb
  2052. return lookdict, includedict
  2053. # -----------------------------------------------------------------------------
  2054. # compute_read_sets()
  2055. #
  2056. # Given a set of LR(0) items, this function computes the read sets.
  2057. #
  2058. # Inputs: C = Set of LR(0) items
  2059. # ntrans = Set of nonterminal transitions
  2060. # nullable = Set of empty transitions
  2061. #
  2062. # Returns a set containing the read sets
  2063. # -----------------------------------------------------------------------------
  2064. def compute_read_sets(self, C, ntrans, nullable):
  2065. FP = lambda x: self.dr_relation(C, x, nullable)
  2066. R = lambda x: self.reads_relation(C, x, nullable)
  2067. F = digraph(ntrans, R, FP)
  2068. return F
  2069. # -----------------------------------------------------------------------------
  2070. # compute_follow_sets()
  2071. #
  2072. # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
  2073. # and an include set, this function computes the follow sets
  2074. #
  2075. # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
  2076. #
  2077. # Inputs:
  2078. # ntrans = Set of nonterminal transitions
  2079. # readsets = Readset (previously computed)
  2080. # inclsets = Include sets (previously computed)
  2081. #
  2082. # Returns a set containing the follow sets
  2083. # -----------------------------------------------------------------------------
  2084. def compute_follow_sets(self, ntrans, readsets, inclsets):
  2085. FP = lambda x: readsets[x]
  2086. R = lambda x: inclsets.get(x, [])
  2087. F = digraph(ntrans, R, FP)
  2088. return F
  2089. # -----------------------------------------------------------------------------
  2090. # add_lookaheads()
  2091. #
  2092. # Attaches the lookahead symbols to grammar rules.
  2093. #
  2094. # Inputs: lookbacks - Set of lookback relations
  2095. # followset - Computed follow set
  2096. #
  2097. # This function directly attaches the lookaheads to productions contained
  2098. # in the lookbacks set
  2099. # -----------------------------------------------------------------------------
  2100. def add_lookaheads(self, lookbacks, followset):
  2101. for trans, lb in lookbacks.items():
  2102. # Loop over productions in lookback
  2103. for state, p in lb:
  2104. if state not in p.lookaheads:
  2105. p.lookaheads[state] = []
  2106. f = followset.get(trans, [])
  2107. for a in f:
  2108. if a not in p.lookaheads[state]:
  2109. p.lookaheads[state].append(a)
  2110. # -----------------------------------------------------------------------------
  2111. # add_lalr_lookaheads()
  2112. #
  2113. # This function does all of the work of adding lookahead information for use
  2114. # with LALR parsing
  2115. # -----------------------------------------------------------------------------
  2116. def add_lalr_lookaheads(self, C):
  2117. # Determine all of the nullable nonterminals
  2118. nullable = self.compute_nullable_nonterminals()
  2119. # Find all non-terminal transitions
  2120. trans = self.find_nonterminal_transitions(C)
  2121. # Compute read sets
  2122. readsets = self.compute_read_sets(C, trans, nullable)
  2123. # Compute lookback/includes relations
  2124. lookd, included = self.compute_lookback_includes(C, trans, nullable)
  2125. # Compute LALR FOLLOW sets
  2126. followsets = self.compute_follow_sets(trans, readsets, included)
  2127. # Add all of the lookaheads
  2128. self.add_lookaheads(lookd, followsets)
  2129. # -----------------------------------------------------------------------------
  2130. # lr_parse_table()
  2131. #
  2132. # This function constructs the parse tables for SLR or LALR
  2133. # -----------------------------------------------------------------------------
  2134. def lr_parse_table(self):
  2135. Productions = self.grammar.Productions
  2136. Precedence = self.grammar.Precedence
  2137. goto = self.lr_goto # Goto array
  2138. action = self.lr_action # Action array
  2139. log = self.log # Logger for output
  2140. actionp = {} # Action production array (temporary)
  2141. log.info('Parsing method: %s', self.lr_method)
  2142. # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
  2143. # This determines the number of states
  2144. C = self.lr0_items()
  2145. if self.lr_method == 'LALR':
  2146. self.add_lalr_lookaheads(C)
  2147. # Build the parser table, state by state
  2148. st = 0
  2149. for I in C:
  2150. # Loop over each production in I
  2151. actlist = [] # List of actions
  2152. st_action = {}
  2153. st_actionp = {}
  2154. st_goto = {}
  2155. log.info('')
  2156. log.info('state %d', st)
  2157. log.info('')
  2158. for p in I:
  2159. log.info(' (%d) %s', p.number, p)
  2160. log.info('')
  2161. for p in I:
  2162. if p.len == p.lr_index + 1:
  2163. if p.name == "S'":
  2164. # Start symbol. Accept!
  2165. st_action['$end'] = 0
  2166. st_actionp['$end'] = p
  2167. else:
  2168. # We are at the end of a production. Reduce!
  2169. if self.lr_method == 'LALR':
  2170. laheads = p.lookaheads[st]
  2171. else:
  2172. laheads = self.grammar.Follow[p.name]
  2173. for a in laheads:
  2174. actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
  2175. r = st_action.get(a)
  2176. if r is not None:
  2177. # Whoa. Have a shift/reduce or reduce/reduce conflict
  2178. if r > 0:
  2179. # Need to decide on shift or reduce here
  2180. # By default we favor shifting. Need to add
  2181. # some precedence rules here.
  2182. # Shift precedence comes from the token
  2183. sprec, slevel = Precedence.get(a, ('right', 0))
  2184. # Reduce precedence comes from rule being reduced (p)
  2185. rprec, rlevel = Productions[p.number].prec
  2186. if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
  2187. # We really need to reduce here.
  2188. st_action[a] = -p.number
  2189. st_actionp[a] = p
  2190. if not slevel and not rlevel:
  2191. log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
  2192. self.sr_conflicts.append((st, a, 'reduce'))
  2193. Productions[p.number].reduced += 1
  2194. elif (slevel == rlevel) and (rprec == 'nonassoc'):
  2195. st_action[a] = None
  2196. else:
  2197. # Hmmm. Guess we'll keep the shift
  2198. if not rlevel:
  2199. log.info(' ! shift/reduce conflict for %s resolved as shift', a)
  2200. self.sr_conflicts.append((st, a, 'shift'))
  2201. elif r < 0:
  2202. # Reduce/reduce conflict. In this case, we favor the rule
  2203. # that was defined first in the grammar file
  2204. oldp = Productions[-r]
  2205. pp = Productions[p.number]
  2206. if oldp.line > pp.line:
  2207. st_action[a] = -p.number
  2208. st_actionp[a] = p
  2209. chosenp, rejectp = pp, oldp
  2210. Productions[p.number].reduced += 1
  2211. Productions[oldp.number].reduced -= 1
  2212. else:
  2213. chosenp, rejectp = oldp, pp
  2214. self.rr_conflicts.append((st, chosenp, rejectp))
  2215. log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
  2216. a, st_actionp[a].number, st_actionp[a])
  2217. else:
  2218. raise LALRError('Unknown conflict in state %d' % st)
  2219. else:
  2220. st_action[a] = -p.number
  2221. st_actionp[a] = p
  2222. Productions[p.number].reduced += 1
  2223. else:
  2224. i = p.lr_index
  2225. a = p.prod[i+1] # Get symbol right after the "."
  2226. if a in self.grammar.Terminals:
  2227. g = self.lr0_goto(I, a)
  2228. j = self.lr0_cidhash.get(id(g), -1)
  2229. if j >= 0:
  2230. # We are in a shift state
  2231. actlist.append((a, p, 'shift and go to state %d' % j))
  2232. r = st_action.get(a)
  2233. if r is not None:
  2234. # Whoa have a shift/reduce or shift/shift conflict
  2235. if r > 0:
  2236. if r != j:
  2237. raise LALRError('Shift/shift conflict in state %d' % st)
  2238. elif r < 0:
  2239. # Do a precedence check.
  2240. # - if precedence of reduce rule is higher, we reduce.
  2241. # - if precedence of reduce is same and left assoc, we reduce.
  2242. # - otherwise we shift
  2243. # Shift precedence comes from the token
  2244. sprec, slevel = Precedence.get(a, ('right', 0))
  2245. # Reduce precedence comes from the rule that could have been reduced
  2246. rprec, rlevel = Productions[st_actionp[a].number].prec
  2247. if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
  2248. # We decide to shift here... highest precedence to shift
  2249. Productions[st_actionp[a].number].reduced -= 1
  2250. st_action[a] = j
  2251. st_actionp[a] = p
  2252. if not rlevel:
  2253. log.info(' ! shift/reduce conflict for %s resolved as shift', a)
  2254. self.sr_conflicts.append((st, a, 'shift'))
  2255. elif (slevel == rlevel) and (rprec == 'nonassoc'):
  2256. st_action[a] = None
  2257. else:
  2258. # Hmmm. Guess we'll keep the reduce
  2259. if not slevel and not rlevel:
  2260. log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
  2261. self.sr_conflicts.append((st, a, 'reduce'))
  2262. else:
  2263. raise LALRError('Unknown conflict in state %d' % st)
  2264. else:
  2265. st_action[a] = j
  2266. st_actionp[a] = p
  2267. # Print the actions associated with each terminal
  2268. _actprint = {}
  2269. for a, p, m in actlist:
  2270. if a in st_action:
  2271. if p is st_actionp[a]:
  2272. log.info(' %-15s %s', a, m)
  2273. _actprint[(a, m)] = 1
  2274. log.info('')
  2275. # Print the actions that were not used. (debugging)
  2276. not_used = 0
  2277. for a, p, m in actlist:
  2278. if a in st_action:
  2279. if p is not st_actionp[a]:
  2280. if not (a, m) in _actprint:
  2281. log.debug(' ! %-15s [ %s ]', a, m)
  2282. not_used = 1
  2283. _actprint[(a, m)] = 1
  2284. if not_used:
  2285. log.debug('')
  2286. # Construct the goto table for this state
  2287. nkeys = {}
  2288. for ii in I:
  2289. for s in ii.usyms:
  2290. if s in self.grammar.Nonterminals:
  2291. nkeys[s] = None
  2292. for n in nkeys:
  2293. g = self.lr0_goto(I, n)
  2294. j = self.lr0_cidhash.get(id(g), -1)
  2295. if j >= 0:
  2296. st_goto[n] = j
  2297. log.info(' %-30s shift and go to state %d', n, j)
  2298. action[st] = st_action
  2299. actionp[st] = st_actionp
  2300. goto[st] = st_goto
  2301. st += 1
  2302. # -----------------------------------------------------------------------------
  2303. # write()
  2304. #
  2305. # This function writes the LR parsing tables to a file
  2306. # -----------------------------------------------------------------------------
  2307. def write_table(self, tabmodule, outputdir='', signature=''):
  2308. if isinstance(tabmodule, types.ModuleType):
  2309. raise IOError("Won't overwrite existing tabmodule")
  2310. basemodulename = tabmodule.split('.')[-1]
  2311. filename = os.path.join(outputdir, basemodulename) + '.py'
  2312. try:
  2313. f = open(filename, 'w')
  2314. f.write('''
  2315. # %s
  2316. # This file is automatically generated. Do not edit.
  2317. _tabversion = %r
  2318. _lr_method = %r
  2319. _lr_signature = %r
  2320. ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
  2321. # Change smaller to 0 to go back to original tables
  2322. smaller = 1
  2323. # Factor out names to try and make smaller
  2324. if smaller:
  2325. items = {}
  2326. for s, nd in self.lr_action.items():
  2327. for name, v in nd.items():
  2328. i = items.get(name)
  2329. if not i:
  2330. i = ([], [])
  2331. items[name] = i
  2332. i[0].append(s)
  2333. i[1].append(v)
  2334. f.write('\n_lr_action_items = {')
  2335. for k, v in items.items():
  2336. f.write('%r:([' % k)
  2337. for i in v[0]:
  2338. f.write('%r,' % i)
  2339. f.write('],[')
  2340. for i in v[1]:
  2341. f.write('%r,' % i)
  2342. f.write(']),')
  2343. f.write('}\n')
  2344. f.write('''
  2345. _lr_action = {}
  2346. for _k, _v in _lr_action_items.items():
  2347. for _x,_y in zip(_v[0],_v[1]):
  2348. if not _x in _lr_action: _lr_action[_x] = {}
  2349. _lr_action[_x][_k] = _y
  2350. del _lr_action_items
  2351. ''')
  2352. else:
  2353. f.write('\n_lr_action = { ')
  2354. for k, v in self.lr_action.items():
  2355. f.write('(%r,%r):%r,' % (k[0], k[1], v))
  2356. f.write('}\n')
  2357. if smaller:
  2358. # Factor out names to try and make smaller
  2359. items = {}
  2360. for s, nd in self.lr_goto.items():
  2361. for name, v in nd.items():
  2362. i = items.get(name)
  2363. if not i:
  2364. i = ([], [])
  2365. items[name] = i
  2366. i[0].append(s)
  2367. i[1].append(v)
  2368. f.write('\n_lr_goto_items = {')
  2369. for k, v in items.items():
  2370. f.write('%r:([' % k)
  2371. for i in v[0]:
  2372. f.write('%r,' % i)
  2373. f.write('],[')
  2374. for i in v[1]:
  2375. f.write('%r,' % i)
  2376. f.write(']),')
  2377. f.write('}\n')
  2378. f.write('''
  2379. _lr_goto = {}
  2380. for _k, _v in _lr_goto_items.items():
  2381. for _x, _y in zip(_v[0], _v[1]):
  2382. if not _x in _lr_goto: _lr_goto[_x] = {}
  2383. _lr_goto[_x][_k] = _y
  2384. del _lr_goto_items
  2385. ''')
  2386. else:
  2387. f.write('\n_lr_goto = { ')
  2388. for k, v in self.lr_goto.items():
  2389. f.write('(%r,%r):%r,' % (k[0], k[1], v))
  2390. f.write('}\n')
  2391. # Write production table
  2392. f.write('_lr_productions = [\n')
  2393. for p in self.lr_productions:
  2394. if p.func:
  2395. f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
  2396. p.func, os.path.basename(p.file), p.line))
  2397. else:
  2398. f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
  2399. f.write(']\n')
  2400. f.close()
  2401. except IOError as e:
  2402. raise
  2403. # -----------------------------------------------------------------------------
  2404. # pickle_table()
  2405. #
  2406. # This function pickles the LR parsing tables to a supplied file object
  2407. # -----------------------------------------------------------------------------
  2408. def pickle_table(self, filename, signature=''):
  2409. try:
  2410. import cPickle as pickle
  2411. except ImportError:
  2412. import pickle
  2413. with open(filename, 'wb') as outf:
  2414. pickle.dump(__tabversion__, outf, pickle_protocol)
  2415. pickle.dump(self.lr_method, outf, pickle_protocol)
  2416. pickle.dump(signature, outf, pickle_protocol)
  2417. pickle.dump(self.lr_action, outf, pickle_protocol)
  2418. pickle.dump(self.lr_goto, outf, pickle_protocol)
  2419. outp = []
  2420. for p in self.lr_productions:
  2421. if p.func:
  2422. outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
  2423. else:
  2424. outp.append((str(p), p.name, p.len, None, None, None))
  2425. pickle.dump(outp, outf, pickle_protocol)
  2426. # -----------------------------------------------------------------------------
  2427. # === INTROSPECTION ===
  2428. #
  2429. # The following functions and classes are used to implement the PLY
  2430. # introspection features followed by the yacc() function itself.
  2431. # -----------------------------------------------------------------------------
  2432. # -----------------------------------------------------------------------------
  2433. # get_caller_module_dict()
  2434. #
  2435. # This function returns a dictionary containing all of the symbols defined within
  2436. # a caller further down the call stack. This is used to get the environment
  2437. # associated with the yacc() call if none was provided.
  2438. # -----------------------------------------------------------------------------
  2439. def get_caller_module_dict(levels):
  2440. f = sys._getframe(levels)
  2441. ldict = f.f_globals.copy()
  2442. if f.f_globals != f.f_locals:
  2443. ldict.update(f.f_locals)
  2444. return ldict
  2445. # -----------------------------------------------------------------------------
  2446. # parse_grammar()
  2447. #
  2448. # This takes a raw grammar rule string and parses it into production data
  2449. # -----------------------------------------------------------------------------
  2450. def parse_grammar(doc, file, line):
  2451. grammar = []
  2452. # Split the doc string into lines
  2453. pstrings = doc.splitlines()
  2454. lastp = None
  2455. dline = line
  2456. for ps in pstrings:
  2457. dline += 1
  2458. p = ps.split()
  2459. if not p:
  2460. continue
  2461. try:
  2462. if p[0] == '|':
  2463. # This is a continuation of a previous rule
  2464. if not lastp:
  2465. raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
  2466. prodname = lastp
  2467. syms = p[1:]
  2468. else:
  2469. prodname = p[0]
  2470. lastp = prodname
  2471. syms = p[2:]
  2472. assign = p[1]
  2473. if assign != ':' and assign != '::=':
  2474. raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
  2475. grammar.append((file, dline, prodname, syms))
  2476. except SyntaxError:
  2477. raise
  2478. except Exception:
  2479. raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
  2480. return grammar
  2481. # -----------------------------------------------------------------------------
  2482. # ParserReflect()
  2483. #
  2484. # This class represents information extracted for building a parser including
  2485. # start symbol, error function, tokens, precedence list, action functions,
  2486. # etc.
  2487. # -----------------------------------------------------------------------------
  2488. class ParserReflect(object):
  2489. def __init__(self, pdict, log=None):
  2490. self.pdict = pdict
  2491. self.start = None
  2492. self.error_func = None
  2493. self.tokens = None
  2494. self.modules = set()
  2495. self.grammar = []
  2496. self.error = False
  2497. if log is None:
  2498. self.log = PlyLogger(sys.stderr)
  2499. else:
  2500. self.log = log
  2501. # Get all of the basic information
  2502. def get_all(self):
  2503. self.get_start()
  2504. self.get_error_func()
  2505. self.get_tokens()
  2506. self.get_precedence()
  2507. self.get_pfunctions()
  2508. # Validate all of the information
  2509. def validate_all(self):
  2510. self.validate_start()
  2511. self.validate_error_func()
  2512. self.validate_tokens()
  2513. self.validate_precedence()
  2514. self.validate_pfunctions()
  2515. self.validate_modules()
  2516. return self.error
  2517. # Compute a signature over the grammar
  2518. def signature(self):
  2519. parts = []
  2520. try:
  2521. if self.start:
  2522. parts.append(self.start)
  2523. if self.prec:
  2524. parts.append(''.join([''.join(p) for p in self.prec]))
  2525. if self.tokens:
  2526. parts.append(' '.join(self.tokens))
  2527. for f in self.pfuncs:
  2528. if f[3]:
  2529. parts.append(f[3])
  2530. except (TypeError, ValueError):
  2531. pass
  2532. return ''.join(parts)
  2533. # -----------------------------------------------------------------------------
  2534. # validate_modules()
  2535. #
  2536. # This method checks to see if there are duplicated p_rulename() functions
  2537. # in the parser module file. Without this function, it is really easy for
  2538. # users to make mistakes by cutting and pasting code fragments (and it's a real
  2539. # bugger to try and figure out why the resulting parser doesn't work). Therefore,
  2540. # we just do a little regular expression pattern matching of def statements
  2541. # to try and detect duplicates.
  2542. # -----------------------------------------------------------------------------
  2543. def validate_modules(self):
  2544. # Match def p_funcname(
  2545. fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
  2546. for module in self.modules:
  2547. try:
  2548. lines, linen = inspect.getsourcelines(module)
  2549. except IOError:
  2550. continue
  2551. counthash = {}
  2552. for linen, line in enumerate(lines):
  2553. linen += 1
  2554. m = fre.match(line)
  2555. if m:
  2556. name = m.group(1)
  2557. prev = counthash.get(name)
  2558. if not prev:
  2559. counthash[name] = linen
  2560. else:
  2561. filename = inspect.getsourcefile(module)
  2562. self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
  2563. filename, linen, name, prev)
  2564. # Get the start symbol
  2565. def get_start(self):
  2566. self.start = self.pdict.get('start')
  2567. # Validate the start symbol
  2568. def validate_start(self):
  2569. if self.start is not None:
  2570. if not isinstance(self.start, string_types):
  2571. self.log.error("'start' must be a string")
  2572. # Look for error handler
  2573. def get_error_func(self):
  2574. self.error_func = self.pdict.get('p_error')
  2575. # Validate the error function
  2576. def validate_error_func(self):
  2577. if self.error_func:
  2578. if isinstance(self.error_func, types.FunctionType):
  2579. ismethod = 0
  2580. elif isinstance(self.error_func, types.MethodType):
  2581. ismethod = 1
  2582. else:
  2583. self.log.error("'p_error' defined, but is not a function or method")
  2584. self.error = True
  2585. return
  2586. eline = self.error_func.__code__.co_firstlineno
  2587. efile = self.error_func.__code__.co_filename
  2588. module = inspect.getmodule(self.error_func)
  2589. self.modules.add(module)
  2590. argcount = self.error_func.__code__.co_argcount - ismethod
  2591. if argcount != 1:
  2592. self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
  2593. self.error = True
  2594. # Get the tokens map
  2595. def get_tokens(self):
  2596. tokens = self.pdict.get('tokens')
  2597. if not tokens:
  2598. self.log.error('No token list is defined')
  2599. self.error = True
  2600. return
  2601. if not isinstance(tokens, (list, tuple)):
  2602. self.log.error('tokens must be a list or tuple')
  2603. self.error = True
  2604. return
  2605. if not tokens:
  2606. self.log.error('tokens is empty')
  2607. self.error = True
  2608. return
  2609. self.tokens = tokens
  2610. # Validate the tokens
  2611. def validate_tokens(self):
  2612. # Validate the tokens.
  2613. if 'error' in self.tokens:
  2614. self.log.error("Illegal token name 'error'. Is a reserved word")
  2615. self.error = True
  2616. return
  2617. terminals = set()
  2618. for n in self.tokens:
  2619. if n in terminals:
  2620. self.log.warning('Token %r multiply defined', n)
  2621. terminals.add(n)
  2622. # Get the precedence map (if any)
  2623. def get_precedence(self):
  2624. self.prec = self.pdict.get('precedence')
  2625. # Validate and parse the precedence map
  2626. def validate_precedence(self):
  2627. preclist = []
  2628. if self.prec:
  2629. if not isinstance(self.prec, (list, tuple)):
  2630. self.log.error('precedence must be a list or tuple')
  2631. self.error = True
  2632. return
  2633. for level, p in enumerate(self.prec):
  2634. if not isinstance(p, (list, tuple)):
  2635. self.log.error('Bad precedence table')
  2636. self.error = True
  2637. return
  2638. if len(p) < 2:
  2639. self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
  2640. self.error = True
  2641. return
  2642. assoc = p[0]
  2643. if not isinstance(assoc, string_types):
  2644. self.log.error('precedence associativity must be a string')
  2645. self.error = True
  2646. return
  2647. for term in p[1:]:
  2648. if not isinstance(term, string_types):
  2649. self.log.error('precedence items must be strings')
  2650. self.error = True
  2651. return
  2652. preclist.append((term, assoc, level+1))
  2653. self.preclist = preclist
  2654. # Get all p_functions from the grammar
  2655. def get_pfunctions(self):
  2656. p_functions = []
  2657. for name, item in self.pdict.items():
  2658. if not name.startswith('p_') or name == 'p_error':
  2659. continue
  2660. if isinstance(item, (types.FunctionType, types.MethodType)):
  2661. line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
  2662. module = inspect.getmodule(item)
  2663. p_functions.append((line, module, name, item.__doc__))
  2664. # Sort all of the actions by line number; make sure to stringify
  2665. # modules to make them sortable, since `line` may not uniquely sort all
  2666. # p functions
  2667. p_functions.sort(key=lambda p_function: (
  2668. p_function[0],
  2669. str(p_function[1]),
  2670. p_function[2],
  2671. p_function[3]))
  2672. self.pfuncs = p_functions
  2673. # Validate all of the p_functions
  2674. def validate_pfunctions(self):
  2675. grammar = []
  2676. # Check for non-empty symbols
  2677. if len(self.pfuncs) == 0:
  2678. self.log.error('no rules of the form p_rulename are defined')
  2679. self.error = True
  2680. return
  2681. for line, module, name, doc in self.pfuncs:
  2682. file = inspect.getsourcefile(module)
  2683. func = self.pdict[name]
  2684. if isinstance(func, types.MethodType):
  2685. reqargs = 2
  2686. else:
  2687. reqargs = 1
  2688. if func.__code__.co_argcount > reqargs:
  2689. self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
  2690. self.error = True
  2691. elif func.__code__.co_argcount < reqargs:
  2692. self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
  2693. self.error = True
  2694. elif not func.__doc__:
  2695. self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
  2696. file, line, func.__name__)
  2697. else:
  2698. try:
  2699. parsed_g = parse_grammar(doc, file, line)
  2700. for g in parsed_g:
  2701. grammar.append((name, g))
  2702. except SyntaxError as e:
  2703. self.log.error(str(e))
  2704. self.error = True
  2705. # Looks like a valid grammar rule
  2706. # Mark the file in which defined.
  2707. self.modules.add(module)
  2708. # Secondary validation step that looks for p_ definitions that are not functions
  2709. # or functions that look like they might be grammar rules.
  2710. for n, v in self.pdict.items():
  2711. if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
  2712. continue
  2713. if n.startswith('t_'):
  2714. continue
  2715. if n.startswith('p_') and n != 'p_error':
  2716. self.log.warning('%r not defined as a function', n)
  2717. if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
  2718. (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
  2719. if v.__doc__:
  2720. try:
  2721. doc = v.__doc__.split(' ')
  2722. if doc[1] == ':':
  2723. self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
  2724. v.__code__.co_filename, v.__code__.co_firstlineno, n)
  2725. except IndexError:
  2726. pass
  2727. self.grammar = grammar
  2728. # -----------------------------------------------------------------------------
  2729. # yacc(module)
  2730. #
  2731. # Build a parser
  2732. # -----------------------------------------------------------------------------
  2733. def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
  2734. check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
  2735. outputdir=None, debuglog=None, errorlog=None, picklefile=None):
  2736. if tabmodule is None:
  2737. tabmodule = tab_module
  2738. # Reference to the parsing method of the last built parser
  2739. global parse
  2740. # If pickling is enabled, table files are not created
  2741. if picklefile:
  2742. write_tables = 0
  2743. if errorlog is None:
  2744. errorlog = PlyLogger(sys.stderr)
  2745. # Get the module dictionary used for the parser
  2746. if module:
  2747. _items = [(k, getattr(module, k)) for k in dir(module)]
  2748. pdict = dict(_items)
  2749. # If no __file__ attribute is available, try to obtain it from the __module__ instead
  2750. if '__file__' not in pdict:
  2751. pdict['__file__'] = sys.modules[pdict['__module__']].__file__
  2752. else:
  2753. pdict = get_caller_module_dict(2)
  2754. if outputdir is None:
  2755. # If no output directory is set, the location of the output files
  2756. # is determined according to the following rules:
  2757. # - If tabmodule specifies a package, files go into that package directory
  2758. # - Otherwise, files go in the same directory as the specifying module
  2759. if isinstance(tabmodule, types.ModuleType):
  2760. srcfile = tabmodule.__file__
  2761. else:
  2762. if '.' not in tabmodule:
  2763. srcfile = pdict['__file__']
  2764. else:
  2765. parts = tabmodule.split('.')
  2766. pkgname = '.'.join(parts[:-1])
  2767. exec('import %s' % pkgname)
  2768. srcfile = getattr(sys.modules[pkgname], '__file__', '')
  2769. outputdir = os.path.dirname(srcfile)
  2770. # Determine if the module is package of a package or not.
  2771. # If so, fix the tabmodule setting so that tables load correctly
  2772. pkg = pdict.get('__package__')
  2773. if pkg and isinstance(tabmodule, str):
  2774. if '.' not in tabmodule:
  2775. tabmodule = pkg + '.' + tabmodule
  2776. # Set start symbol if it's specified directly using an argument
  2777. if start is not None:
  2778. pdict['start'] = start
  2779. # Collect parser information from the dictionary
  2780. pinfo = ParserReflect(pdict, log=errorlog)
  2781. pinfo.get_all()
  2782. if pinfo.error:
  2783. raise YaccError('Unable to build parser')
  2784. # Check signature against table files (if any)
  2785. signature = pinfo.signature()
  2786. # Read the tables
  2787. try:
  2788. lr = LRTable()
  2789. if picklefile:
  2790. read_signature = lr.read_pickle(picklefile)
  2791. else:
  2792. read_signature = lr.read_table(tabmodule)
  2793. if optimize or (read_signature == signature):
  2794. try:
  2795. lr.bind_callables(pinfo.pdict)
  2796. parser = LRParser(lr, pinfo.error_func)
  2797. parse = parser.parse
  2798. return parser
  2799. except Exception as e:
  2800. errorlog.warning('There was a problem loading the table file: %r', e)
  2801. except VersionError as e:
  2802. errorlog.warning(str(e))
  2803. except ImportError:
  2804. pass
  2805. if debuglog is None:
  2806. if debug:
  2807. try:
  2808. debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
  2809. except IOError as e:
  2810. errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
  2811. debuglog = NullLogger()
  2812. else:
  2813. debuglog = NullLogger()
  2814. debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
  2815. errors = False
  2816. # Validate the parser information
  2817. if pinfo.validate_all():
  2818. raise YaccError('Unable to build parser')
  2819. if not pinfo.error_func:
  2820. errorlog.warning('no p_error() function is defined')
  2821. # Create a grammar object
  2822. grammar = Grammar(pinfo.tokens)
  2823. # Set precedence level for terminals
  2824. for term, assoc, level in pinfo.preclist:
  2825. try:
  2826. grammar.set_precedence(term, assoc, level)
  2827. except GrammarError as e:
  2828. errorlog.warning('%s', e)
  2829. # Add productions to the grammar
  2830. for funcname, gram in pinfo.grammar:
  2831. file, line, prodname, syms = gram
  2832. try:
  2833. grammar.add_production(prodname, syms, funcname, file, line)
  2834. except GrammarError as e:
  2835. errorlog.error('%s', e)
  2836. errors = True
  2837. # Set the grammar start symbols
  2838. try:
  2839. if start is None:
  2840. grammar.set_start(pinfo.start)
  2841. else:
  2842. grammar.set_start(start)
  2843. except GrammarError as e:
  2844. errorlog.error(str(e))
  2845. errors = True
  2846. if errors:
  2847. raise YaccError('Unable to build parser')
  2848. # Verify the grammar structure
  2849. undefined_symbols = grammar.undefined_symbols()
  2850. for sym, prod in undefined_symbols:
  2851. errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
  2852. errors = True
  2853. unused_terminals = grammar.unused_terminals()
  2854. if unused_terminals:
  2855. debuglog.info('')
  2856. debuglog.info('Unused terminals:')
  2857. debuglog.info('')
  2858. for term in unused_terminals:
  2859. errorlog.warning('Token %r defined, but not used', term)
  2860. debuglog.info(' %s', term)
  2861. # Print out all productions to the debug log
  2862. if debug:
  2863. debuglog.info('')
  2864. debuglog.info('Grammar')
  2865. debuglog.info('')
  2866. for n, p in enumerate(grammar.Productions):
  2867. debuglog.info('Rule %-5d %s', n, p)
  2868. # Find unused non-terminals
  2869. unused_rules = grammar.unused_rules()
  2870. for prod in unused_rules:
  2871. errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
  2872. if len(unused_terminals) == 1:
  2873. errorlog.warning('There is 1 unused token')
  2874. if len(unused_terminals) > 1:
  2875. errorlog.warning('There are %d unused tokens', len(unused_terminals))
  2876. if len(unused_rules) == 1:
  2877. errorlog.warning('There is 1 unused rule')
  2878. if len(unused_rules) > 1:
  2879. errorlog.warning('There are %d unused rules', len(unused_rules))
  2880. if debug:
  2881. debuglog.info('')
  2882. debuglog.info('Terminals, with rules where they appear')
  2883. debuglog.info('')
  2884. terms = list(grammar.Terminals)
  2885. terms.sort()
  2886. for term in terms:
  2887. debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
  2888. debuglog.info('')
  2889. debuglog.info('Nonterminals, with rules where they appear')
  2890. debuglog.info('')
  2891. nonterms = list(grammar.Nonterminals)
  2892. nonterms.sort()
  2893. for nonterm in nonterms:
  2894. debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
  2895. debuglog.info('')
  2896. if check_recursion:
  2897. unreachable = grammar.find_unreachable()
  2898. for u in unreachable:
  2899. errorlog.warning('Symbol %r is unreachable', u)
  2900. infinite = grammar.infinite_cycles()
  2901. for inf in infinite:
  2902. errorlog.error('Infinite recursion detected for symbol %r', inf)
  2903. errors = True
  2904. unused_prec = grammar.unused_precedence()
  2905. for term, assoc in unused_prec:
  2906. errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
  2907. errors = True
  2908. if errors:
  2909. raise YaccError('Unable to build parser')
  2910. # Run the LRGeneratedTable on the grammar
  2911. if debug:
  2912. errorlog.debug('Generating %s tables', method)
  2913. lr = LRGeneratedTable(grammar, method, debuglog)
  2914. if debug:
  2915. num_sr = len(lr.sr_conflicts)
  2916. # Report shift/reduce and reduce/reduce conflicts
  2917. if num_sr == 1:
  2918. errorlog.warning('1 shift/reduce conflict')
  2919. elif num_sr > 1:
  2920. errorlog.warning('%d shift/reduce conflicts', num_sr)
  2921. num_rr = len(lr.rr_conflicts)
  2922. if num_rr == 1:
  2923. errorlog.warning('1 reduce/reduce conflict')
  2924. elif num_rr > 1:
  2925. errorlog.warning('%d reduce/reduce conflicts', num_rr)
  2926. # Write out conflicts to the output file
  2927. if debug and (lr.sr_conflicts or lr.rr_conflicts):
  2928. debuglog.warning('')
  2929. debuglog.warning('Conflicts:')
  2930. debuglog.warning('')
  2931. for state, tok, resolution in lr.sr_conflicts:
  2932. debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
  2933. already_reported = set()
  2934. for state, rule, rejected in lr.rr_conflicts:
  2935. if (state, id(rule), id(rejected)) in already_reported:
  2936. continue
  2937. debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
  2938. debuglog.warning('rejected rule (%s) in state %d', rejected, state)
  2939. errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
  2940. errorlog.warning('rejected rule (%s) in state %d', rejected, state)
  2941. already_reported.add((state, id(rule), id(rejected)))
  2942. warned_never = []
  2943. for state, rule, rejected in lr.rr_conflicts:
  2944. if not rejected.reduced and (rejected not in warned_never):
  2945. debuglog.warning('Rule (%s) is never reduced', rejected)
  2946. errorlog.warning('Rule (%s) is never reduced', rejected)
  2947. warned_never.append(rejected)
  2948. # Write the table file if requested
  2949. if write_tables:
  2950. try:
  2951. lr.write_table(tabmodule, outputdir, signature)
  2952. except IOError as e:
  2953. errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
  2954. # Write a pickled version of the tables
  2955. if picklefile:
  2956. try:
  2957. lr.pickle_table(picklefile, signature)
  2958. except IOError as e:
  2959. errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
  2960. # Build the parser
  2961. lr.bind_callables(pinfo.pdict)
  2962. parser = LRParser(lr, pinfo.error_func)
  2963. parse = parser.parse
  2964. return parser