Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

pyparsing.py 224KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696
  1. # module pyparsing.py
  2. #
  3. # Copyright (c) 2003-2016 Paul T. McGuire
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining
  6. # a copy of this software and associated documentation files (the
  7. # "Software"), to deal in the Software without restriction, including
  8. # without limitation the rights to use, copy, modify, merge, publish,
  9. # distribute, sublicense, and/or sell copies of the Software, and to
  10. # permit persons to whom the Software is furnished to do so, subject to
  11. # the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be
  14. # included in all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  20. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23. #
  24. __doc__ = \
  25. """
  26. pyparsing module - Classes and methods to define and execute parsing grammars
  27. The pyparsing module is an alternative approach to creating and executing simple grammars,
  28. vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
  29. don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
  30. provides a library of classes that you use to construct the grammar directly in Python.
  31. Here is a program to parse "Hello, World!" (or any greeting of the form
  32. C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
  33. (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
  34. L{Literal} expressions)::
  35. from pyparsing import Word, alphas
  36. # define grammar of a greeting
  37. greet = Word(alphas) + "," + Word(alphas) + "!"
  38. hello = "Hello, World!"
  39. print (hello, "->", greet.parseString(hello))
  40. The program outputs the following::
  41. Hello, World! -> ['Hello', ',', 'World', '!']
  42. The Python representation of the grammar is quite readable, owing to the self-explanatory
  43. class names, and the use of '+', '|' and '^' operators.
  44. The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
  45. object with named attributes.
  46. The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
  47. - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
  48. - quoted strings
  49. - embedded comments
  50. """
  51. __version__ = "2.1.10"
  52. __versionTime__ = "07 Oct 2016 01:31 UTC"
  53. __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
  54. import string
  55. from weakref import ref as wkref
  56. import copy
  57. import sys
  58. import warnings
  59. import re
  60. import sre_constants
  61. import collections
  62. import pprint
  63. import traceback
  64. import types
  65. from datetime import datetime
  66. try:
  67. from _thread import RLock
  68. except ImportError:
  69. from threading import RLock
  70. try:
  71. from collections import OrderedDict as _OrderedDict
  72. except ImportError:
  73. try:
  74. from ordereddict import OrderedDict as _OrderedDict
  75. except ImportError:
  76. _OrderedDict = None
  77. #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
  78. __all__ = [
  79. 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
  80. 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
  81. 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
  82. 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
  83. 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
  84. 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
  85. 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
  86. 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
  87. 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
  88. 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
  89. 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
  90. 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
  91. 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
  92. 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
  93. 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
  94. 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
  95. 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
  96. 'CloseMatch', 'tokenMap', 'pyparsing_common',
  97. ]
  98. system_version = tuple(sys.version_info)[:3]
  99. PY_3 = system_version[0] == 3
  100. if PY_3:
  101. _MAX_INT = sys.maxsize
  102. basestring = str
  103. unichr = chr
  104. _ustr = str
  105. # build list of single arg builtins, that can be used as parse actions
  106. singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
  107. else:
  108. _MAX_INT = sys.maxint
  109. range = xrange
  110. def _ustr(obj):
  111. """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
  112. str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
  113. then < returns the unicode object | encodes it with the default encoding | ... >.
  114. """
  115. if isinstance(obj,unicode):
  116. return obj
  117. try:
  118. # If this works, then _ustr(obj) has the same behaviour as str(obj), so
  119. # it won't break any existing code.
  120. return str(obj)
  121. except UnicodeEncodeError:
  122. # Else encode it
  123. ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
  124. xmlcharref = Regex('&#\d+;')
  125. xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
  126. return xmlcharref.transformString(ret)
  127. # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
  128. singleArgBuiltins = []
  129. import __builtin__
  130. for fname in "sum len sorted reversed list tuple set any all min max".split():
  131. try:
  132. singleArgBuiltins.append(getattr(__builtin__,fname))
  133. except AttributeError:
  134. continue
  135. _generatorType = type((y for y in range(1)))
  136. def _xml_escape(data):
  137. """Escape &, <, >, ", ', etc. in a string of data."""
  138. # ampersand must be replaced first
  139. from_symbols = '&><"\''
  140. to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
  141. for from_,to_ in zip(from_symbols, to_symbols):
  142. data = data.replace(from_, to_)
  143. return data
  144. class _Constants(object):
  145. pass
  146. alphas = string.ascii_uppercase + string.ascii_lowercase
  147. nums = "0123456789"
  148. hexnums = nums + "ABCDEFabcdef"
  149. alphanums = alphas + nums
  150. _bslash = chr(92)
  151. printables = "".join(c for c in string.printable if c not in string.whitespace)
  152. class ParseBaseException(Exception):
  153. """base exception class for all parsing runtime exceptions"""
  154. # Performance tuning: we construct a *lot* of these, so keep this
  155. # constructor as small and fast as possible
  156. def __init__( self, pstr, loc=0, msg=None, elem=None ):
  157. self.loc = loc
  158. if msg is None:
  159. self.msg = pstr
  160. self.pstr = ""
  161. else:
  162. self.msg = msg
  163. self.pstr = pstr
  164. self.parserElement = elem
  165. self.args = (pstr, loc, msg)
  166. @classmethod
  167. def _from_exception(cls, pe):
  168. """
  169. internal factory method to simplify creating one type of ParseException
  170. from another - avoids having __init__ signature conflicts among subclasses
  171. """
  172. return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
  173. def __getattr__( self, aname ):
  174. """supported attributes by name are:
  175. - lineno - returns the line number of the exception text
  176. - col - returns the column number of the exception text
  177. - line - returns the line containing the exception text
  178. """
  179. if( aname == "lineno" ):
  180. return lineno( self.loc, self.pstr )
  181. elif( aname in ("col", "column") ):
  182. return col( self.loc, self.pstr )
  183. elif( aname == "line" ):
  184. return line( self.loc, self.pstr )
  185. else:
  186. raise AttributeError(aname)
  187. def __str__( self ):
  188. return "%s (at char %d), (line:%d, col:%d)" % \
  189. ( self.msg, self.loc, self.lineno, self.column )
  190. def __repr__( self ):
  191. return _ustr(self)
  192. def markInputline( self, markerString = ">!<" ):
  193. """Extracts the exception line from the input string, and marks
  194. the location of the exception with a special symbol.
  195. """
  196. line_str = self.line
  197. line_column = self.column - 1
  198. if markerString:
  199. line_str = "".join((line_str[:line_column],
  200. markerString, line_str[line_column:]))
  201. return line_str.strip()
  202. def __dir__(self):
  203. return "lineno col line".split() + dir(type(self))
  204. class ParseException(ParseBaseException):
  205. """
  206. Exception thrown when parse expressions don't match class;
  207. supported attributes by name are:
  208. - lineno - returns the line number of the exception text
  209. - col - returns the column number of the exception text
  210. - line - returns the line containing the exception text
  211. Example::
  212. try:
  213. Word(nums).setName("integer").parseString("ABC")
  214. except ParseException as pe:
  215. print(pe)
  216. print("column: {}".format(pe.col))
  217. prints::
  218. Expected integer (at char 0), (line:1, col:1)
  219. column: 1
  220. """
  221. pass
  222. class ParseFatalException(ParseBaseException):
  223. """user-throwable exception thrown when inconsistent parse content
  224. is found; stops all parsing immediately"""
  225. pass
  226. class ParseSyntaxException(ParseFatalException):
  227. """just like L{ParseFatalException}, but thrown internally when an
  228. L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
  229. immediately because an unbacktrackable syntax error has been found"""
  230. pass
  231. #~ class ReparseException(ParseBaseException):
  232. #~ """Experimental class - parse actions can raise this exception to cause
  233. #~ pyparsing to reparse the input string:
  234. #~ - with a modified input string, and/or
  235. #~ - with a modified start location
  236. #~ Set the values of the ReparseException in the constructor, and raise the
  237. #~ exception in a parse action to cause pyparsing to use the new string/location.
  238. #~ Setting the values as None causes no change to be made.
  239. #~ """
  240. #~ def __init_( self, newstring, restartLoc ):
  241. #~ self.newParseText = newstring
  242. #~ self.reparseLoc = restartLoc
  243. class RecursiveGrammarException(Exception):
  244. """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
  245. def __init__( self, parseElementList ):
  246. self.parseElementTrace = parseElementList
  247. def __str__( self ):
  248. return "RecursiveGrammarException: %s" % self.parseElementTrace
  249. class _ParseResultsWithOffset(object):
  250. def __init__(self,p1,p2):
  251. self.tup = (p1,p2)
  252. def __getitem__(self,i):
  253. return self.tup[i]
  254. def __repr__(self):
  255. return repr(self.tup[0])
  256. def setOffset(self,i):
  257. self.tup = (self.tup[0],i)
  258. class ParseResults(object):
  259. """
  260. Structured parse results, to provide multiple means of access to the parsed data:
  261. - as a list (C{len(results)})
  262. - by list index (C{results[0], results[1]}, etc.)
  263. - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
  264. Example::
  265. integer = Word(nums)
  266. date_str = (integer.setResultsName("year") + '/'
  267. + integer.setResultsName("month") + '/'
  268. + integer.setResultsName("day"))
  269. # equivalent form:
  270. # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  271. # parseString returns a ParseResults object
  272. result = date_str.parseString("1999/12/31")
  273. def test(s, fn=repr):
  274. print("%s -> %s" % (s, fn(eval(s))))
  275. test("list(result)")
  276. test("result[0]")
  277. test("result['month']")
  278. test("result.day")
  279. test("'month' in result")
  280. test("'minutes' in result")
  281. test("result.dump()", str)
  282. prints::
  283. list(result) -> ['1999', '/', '12', '/', '31']
  284. result[0] -> '1999'
  285. result['month'] -> '12'
  286. result.day -> '31'
  287. 'month' in result -> True
  288. 'minutes' in result -> False
  289. result.dump() -> ['1999', '/', '12', '/', '31']
  290. - day: 31
  291. - month: 12
  292. - year: 1999
  293. """
  294. def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
  295. if isinstance(toklist, cls):
  296. return toklist
  297. retobj = object.__new__(cls)
  298. retobj.__doinit = True
  299. return retobj
  300. # Performance tuning: we construct a *lot* of these, so keep this
  301. # constructor as small and fast as possible
  302. def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
  303. if self.__doinit:
  304. self.__doinit = False
  305. self.__name = None
  306. self.__parent = None
  307. self.__accumNames = {}
  308. self.__asList = asList
  309. self.__modal = modal
  310. if toklist is None:
  311. toklist = []
  312. if isinstance(toklist, list):
  313. self.__toklist = toklist[:]
  314. elif isinstance(toklist, _generatorType):
  315. self.__toklist = list(toklist)
  316. else:
  317. self.__toklist = [toklist]
  318. self.__tokdict = dict()
  319. if name is not None and name:
  320. if not modal:
  321. self.__accumNames[name] = 0
  322. if isinstance(name,int):
  323. name = _ustr(name) # will always return a str, but use _ustr for consistency
  324. self.__name = name
  325. if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
  326. if isinstance(toklist,basestring):
  327. toklist = [ toklist ]
  328. if asList:
  329. if isinstance(toklist,ParseResults):
  330. self[name] = _ParseResultsWithOffset(toklist.copy(),0)
  331. else:
  332. self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
  333. self[name].__name = name
  334. else:
  335. try:
  336. self[name] = toklist[0]
  337. except (KeyError,TypeError,IndexError):
  338. self[name] = toklist
  339. def __getitem__( self, i ):
  340. if isinstance( i, (int,slice) ):
  341. return self.__toklist[i]
  342. else:
  343. if i not in self.__accumNames:
  344. return self.__tokdict[i][-1][0]
  345. else:
  346. return ParseResults([ v[0] for v in self.__tokdict[i] ])
  347. def __setitem__( self, k, v, isinstance=isinstance ):
  348. if isinstance(v,_ParseResultsWithOffset):
  349. self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
  350. sub = v[0]
  351. elif isinstance(k,(int,slice)):
  352. self.__toklist[k] = v
  353. sub = v
  354. else:
  355. self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
  356. sub = v
  357. if isinstance(sub,ParseResults):
  358. sub.__parent = wkref(self)
  359. def __delitem__( self, i ):
  360. if isinstance(i,(int,slice)):
  361. mylen = len( self.__toklist )
  362. del self.__toklist[i]
  363. # convert int to slice
  364. if isinstance(i, int):
  365. if i < 0:
  366. i += mylen
  367. i = slice(i, i+1)
  368. # get removed indices
  369. removed = list(range(*i.indices(mylen)))
  370. removed.reverse()
  371. # fixup indices in token dictionary
  372. for name,occurrences in self.__tokdict.items():
  373. for j in removed:
  374. for k, (value, position) in enumerate(occurrences):
  375. occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
  376. else:
  377. del self.__tokdict[i]
  378. def __contains__( self, k ):
  379. return k in self.__tokdict
  380. def __len__( self ): return len( self.__toklist )
  381. def __bool__(self): return ( not not self.__toklist )
  382. __nonzero__ = __bool__
  383. def __iter__( self ): return iter( self.__toklist )
  384. def __reversed__( self ): return iter( self.__toklist[::-1] )
  385. def _iterkeys( self ):
  386. if hasattr(self.__tokdict, "iterkeys"):
  387. return self.__tokdict.iterkeys()
  388. else:
  389. return iter(self.__tokdict)
  390. def _itervalues( self ):
  391. return (self[k] for k in self._iterkeys())
  392. def _iteritems( self ):
  393. return ((k, self[k]) for k in self._iterkeys())
  394. if PY_3:
  395. keys = _iterkeys
  396. """Returns an iterator of all named result keys (Python 3.x only)."""
  397. values = _itervalues
  398. """Returns an iterator of all named result values (Python 3.x only)."""
  399. items = _iteritems
  400. """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
  401. else:
  402. iterkeys = _iterkeys
  403. """Returns an iterator of all named result keys (Python 2.x only)."""
  404. itervalues = _itervalues
  405. """Returns an iterator of all named result values (Python 2.x only)."""
  406. iteritems = _iteritems
  407. """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
  408. def keys( self ):
  409. """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
  410. return list(self.iterkeys())
  411. def values( self ):
  412. """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
  413. return list(self.itervalues())
  414. def items( self ):
  415. """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
  416. return list(self.iteritems())
  417. def haskeys( self ):
  418. """Since keys() returns an iterator, this method is helpful in bypassing
  419. code that looks for the existence of any defined results names."""
  420. return bool(self.__tokdict)
  421. def pop( self, *args, **kwargs):
  422. """
  423. Removes and returns item at specified index (default=C{last}).
  424. Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
  425. argument or an integer argument, it will use C{list} semantics
  426. and pop tokens from the list of parsed tokens. If passed a
  427. non-integer argument (most likely a string), it will use C{dict}
  428. semantics and pop the corresponding value from any defined
  429. results names. A second default return value argument is
  430. supported, just as in C{dict.pop()}.
  431. Example::
  432. def remove_first(tokens):
  433. tokens.pop(0)
  434. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  435. print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
  436. label = Word(alphas)
  437. patt = label("LABEL") + OneOrMore(Word(nums))
  438. print(patt.parseString("AAB 123 321").dump())
  439. # Use pop() in a parse action to remove named result (note that corresponding value is not
  440. # removed from list form of results)
  441. def remove_LABEL(tokens):
  442. tokens.pop("LABEL")
  443. return tokens
  444. patt.addParseAction(remove_LABEL)
  445. print(patt.parseString("AAB 123 321").dump())
  446. prints::
  447. ['AAB', '123', '321']
  448. - LABEL: AAB
  449. ['AAB', '123', '321']
  450. """
  451. if not args:
  452. args = [-1]
  453. for k,v in kwargs.items():
  454. if k == 'default':
  455. args = (args[0], v)
  456. else:
  457. raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
  458. if (isinstance(args[0], int) or
  459. len(args) == 1 or
  460. args[0] in self):
  461. index = args[0]
  462. ret = self[index]
  463. del self[index]
  464. return ret
  465. else:
  466. defaultvalue = args[1]
  467. return defaultvalue
  468. def get(self, key, defaultValue=None):
  469. """
  470. Returns named result matching the given key, or if there is no
  471. such name, then returns the given C{defaultValue} or C{None} if no
  472. C{defaultValue} is specified.
  473. Similar to C{dict.get()}.
  474. Example::
  475. integer = Word(nums)
  476. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  477. result = date_str.parseString("1999/12/31")
  478. print(result.get("year")) # -> '1999'
  479. print(result.get("hour", "not specified")) # -> 'not specified'
  480. print(result.get("hour")) # -> None
  481. """
  482. if key in self:
  483. return self[key]
  484. else:
  485. return defaultValue
  486. def insert( self, index, insStr ):
  487. """
  488. Inserts new element at location index in the list of parsed tokens.
  489. Similar to C{list.insert()}.
  490. Example::
  491. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  492. # use a parse action to insert the parse location in the front of the parsed results
  493. def insert_locn(locn, tokens):
  494. tokens.insert(0, locn)
  495. print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
  496. """
  497. self.__toklist.insert(index, insStr)
  498. # fixup indices in token dictionary
  499. for name,occurrences in self.__tokdict.items():
  500. for k, (value, position) in enumerate(occurrences):
  501. occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
  502. def append( self, item ):
  503. """
  504. Add single element to end of ParseResults list of elements.
  505. Example::
  506. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  507. # use a parse action to compute the sum of the parsed integers, and add it to the end
  508. def append_sum(tokens):
  509. tokens.append(sum(map(int, tokens)))
  510. print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
  511. """
  512. self.__toklist.append(item)
  513. def extend( self, itemseq ):
  514. """
  515. Add sequence of elements to end of ParseResults list of elements.
  516. Example::
  517. patt = OneOrMore(Word(alphas))
  518. # use a parse action to append the reverse of the matched strings, to make a palindrome
  519. def make_palindrome(tokens):
  520. tokens.extend(reversed([t[::-1] for t in tokens]))
  521. return ''.join(tokens)
  522. print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
  523. """
  524. if isinstance(itemseq, ParseResults):
  525. self += itemseq
  526. else:
  527. self.__toklist.extend(itemseq)
  528. def clear( self ):
  529. """
  530. Clear all elements and results names.
  531. """
  532. del self.__toklist[:]
  533. self.__tokdict.clear()
  534. def __getattr__( self, name ):
  535. try:
  536. return self[name]
  537. except KeyError:
  538. return ""
  539. if name in self.__tokdict:
  540. if name not in self.__accumNames:
  541. return self.__tokdict[name][-1][0]
  542. else:
  543. return ParseResults([ v[0] for v in self.__tokdict[name] ])
  544. else:
  545. return ""
  546. def __add__( self, other ):
  547. ret = self.copy()
  548. ret += other
  549. return ret
  550. def __iadd__( self, other ):
  551. if other.__tokdict:
  552. offset = len(self.__toklist)
  553. addoffset = lambda a: offset if a<0 else a+offset
  554. otheritems = other.__tokdict.items()
  555. otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
  556. for (k,vlist) in otheritems for v in vlist]
  557. for k,v in otherdictitems:
  558. self[k] = v
  559. if isinstance(v[0],ParseResults):
  560. v[0].__parent = wkref(self)
  561. self.__toklist += other.__toklist
  562. self.__accumNames.update( other.__accumNames )
  563. return self
  564. def __radd__(self, other):
  565. if isinstance(other,int) and other == 0:
  566. # useful for merging many ParseResults using sum() builtin
  567. return self.copy()
  568. else:
  569. # this may raise a TypeError - so be it
  570. return other + self
  571. def __repr__( self ):
  572. return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
  573. def __str__( self ):
  574. return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
  575. def _asStringList( self, sep='' ):
  576. out = []
  577. for item in self.__toklist:
  578. if out and sep:
  579. out.append(sep)
  580. if isinstance( item, ParseResults ):
  581. out += item._asStringList()
  582. else:
  583. out.append( _ustr(item) )
  584. return out
  585. def asList( self ):
  586. """
  587. Returns the parse results as a nested list of matching tokens, all converted to strings.
  588. Example::
  589. patt = OneOrMore(Word(alphas))
  590. result = patt.parseString("sldkj lsdkj sldkj")
  591. # even though the result prints in string-like form, it is actually a pyparsing ParseResults
  592. print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
  593. # Use asList() to create an actual list
  594. result_list = result.asList()
  595. print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
  596. """
  597. return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
  598. def asDict( self ):
  599. """
  600. Returns the named parse results as a nested dictionary.
  601. Example::
  602. integer = Word(nums)
  603. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  604. result = date_str.parseString('12/31/1999')
  605. print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
  606. result_dict = result.asDict()
  607. print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
  608. # even though a ParseResults supports dict-like access, sometime you just need to have a dict
  609. import json
  610. print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
  611. print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
  612. """
  613. if PY_3:
  614. item_fn = self.items
  615. else:
  616. item_fn = self.iteritems
  617. def toItem(obj):
  618. if isinstance(obj, ParseResults):
  619. if obj.haskeys():
  620. return obj.asDict()
  621. else:
  622. return [toItem(v) for v in obj]
  623. else:
  624. return obj
  625. return dict((k,toItem(v)) for k,v in item_fn())
  626. def copy( self ):
  627. """
  628. Returns a new copy of a C{ParseResults} object.
  629. """
  630. ret = ParseResults( self.__toklist )
  631. ret.__tokdict = self.__tokdict.copy()
  632. ret.__parent = self.__parent
  633. ret.__accumNames.update( self.__accumNames )
  634. ret.__name = self.__name
  635. return ret
  636. def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
  637. """
  638. (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
  639. """
  640. nl = "\n"
  641. out = []
  642. namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
  643. for v in vlist)
  644. nextLevelIndent = indent + " "
  645. # collapse out indents if formatting is not desired
  646. if not formatted:
  647. indent = ""
  648. nextLevelIndent = ""
  649. nl = ""
  650. selfTag = None
  651. if doctag is not None:
  652. selfTag = doctag
  653. else:
  654. if self.__name:
  655. selfTag = self.__name
  656. if not selfTag:
  657. if namedItemsOnly:
  658. return ""
  659. else:
  660. selfTag = "ITEM"
  661. out += [ nl, indent, "<", selfTag, ">" ]
  662. for i,res in enumerate(self.__toklist):
  663. if isinstance(res,ParseResults):
  664. if i in namedItems:
  665. out += [ res.asXML(namedItems[i],
  666. namedItemsOnly and doctag is None,
  667. nextLevelIndent,
  668. formatted)]
  669. else:
  670. out += [ res.asXML(None,
  671. namedItemsOnly and doctag is None,
  672. nextLevelIndent,
  673. formatted)]
  674. else:
  675. # individual token, see if there is a name for it
  676. resTag = None
  677. if i in namedItems:
  678. resTag = namedItems[i]
  679. if not resTag:
  680. if namedItemsOnly:
  681. continue
  682. else:
  683. resTag = "ITEM"
  684. xmlBodyText = _xml_escape(_ustr(res))
  685. out += [ nl, nextLevelIndent, "<", resTag, ">",
  686. xmlBodyText,
  687. "</", resTag, ">" ]
  688. out += [ nl, indent, "</", selfTag, ">" ]
  689. return "".join(out)
  690. def __lookup(self,sub):
  691. for k,vlist in self.__tokdict.items():
  692. for v,loc in vlist:
  693. if sub is v:
  694. return k
  695. return None
  696. def getName(self):
  697. """
  698. Returns the results name for this token expression. Useful when several
  699. different expressions might match at a particular location.
  700. Example::
  701. integer = Word(nums)
  702. ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
  703. house_number_expr = Suppress('#') + Word(nums, alphanums)
  704. user_data = (Group(house_number_expr)("house_number")
  705. | Group(ssn_expr)("ssn")
  706. | Group(integer)("age"))
  707. user_info = OneOrMore(user_data)
  708. result = user_info.parseString("22 111-22-3333 #221B")
  709. for item in result:
  710. print(item.getName(), ':', item[0])
  711. prints::
  712. age : 22
  713. ssn : 111-22-3333
  714. house_number : 221B
  715. """
  716. if self.__name:
  717. return self.__name
  718. elif self.__parent:
  719. par = self.__parent()
  720. if par:
  721. return par.__lookup(self)
  722. else:
  723. return None
  724. elif (len(self) == 1 and
  725. len(self.__tokdict) == 1 and
  726. next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
  727. return next(iter(self.__tokdict.keys()))
  728. else:
  729. return None
  730. def dump(self, indent='', depth=0, full=True):
  731. """
  732. Diagnostic method for listing out the contents of a C{ParseResults}.
  733. Accepts an optional C{indent} argument so that this string can be embedded
  734. in a nested display of other data.
  735. Example::
  736. integer = Word(nums)
  737. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  738. result = date_str.parseString('12/31/1999')
  739. print(result.dump())
  740. prints::
  741. ['12', '/', '31', '/', '1999']
  742. - day: 1999
  743. - month: 31
  744. - year: 12
  745. """
  746. out = []
  747. NL = '\n'
  748. out.append( indent+_ustr(self.asList()) )
  749. if full:
  750. if self.haskeys():
  751. items = sorted((str(k), v) for k,v in self.items())
  752. for k,v in items:
  753. if out:
  754. out.append(NL)
  755. out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
  756. if isinstance(v,ParseResults):
  757. if v:
  758. out.append( v.dump(indent,depth+1) )
  759. else:
  760. out.append(_ustr(v))
  761. else:
  762. out.append(repr(v))
  763. elif any(isinstance(vv,ParseResults) for vv in self):
  764. v = self
  765. for i,vv in enumerate(v):
  766. if isinstance(vv,ParseResults):
  767. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
  768. else:
  769. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
  770. return "".join(out)
  771. def pprint(self, *args, **kwargs):
  772. """
  773. Pretty-printer for parsed results as a list, using the C{pprint} module.
  774. Accepts additional positional or keyword args as defined for the
  775. C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
  776. Example::
  777. ident = Word(alphas, alphanums)
  778. num = Word(nums)
  779. func = Forward()
  780. term = ident | num | Group('(' + func + ')')
  781. func <<= ident + Group(Optional(delimitedList(term)))
  782. result = func.parseString("fna a,b,(fnb c,d,200),100")
  783. result.pprint(width=40)
  784. prints::
  785. ['fna',
  786. ['a',
  787. 'b',
  788. ['(', 'fnb', ['c', 'd', '200'], ')'],
  789. '100']]
  790. """
  791. pprint.pprint(self.asList(), *args, **kwargs)
  792. # add support for pickle protocol
  793. def __getstate__(self):
  794. return ( self.__toklist,
  795. ( self.__tokdict.copy(),
  796. self.__parent is not None and self.__parent() or None,
  797. self.__accumNames,
  798. self.__name ) )
  799. def __setstate__(self,state):
  800. self.__toklist = state[0]
  801. (self.__tokdict,
  802. par,
  803. inAccumNames,
  804. self.__name) = state[1]
  805. self.__accumNames = {}
  806. self.__accumNames.update(inAccumNames)
  807. if par is not None:
  808. self.__parent = wkref(par)
  809. else:
  810. self.__parent = None
  811. def __getnewargs__(self):
  812. return self.__toklist, self.__name, self.__asList, self.__modal
  813. def __dir__(self):
  814. return (dir(type(self)) + list(self.keys()))
  815. collections.MutableMapping.register(ParseResults)
  816. def col (loc,strg):
  817. """Returns current column within a string, counting newlines as line separators.
  818. The first column is number 1.
  819. Note: the default parsing behavior is to expand tabs in the input string
  820. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  821. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  822. consistent view of the parsed string, the parse location, and line and column
  823. positions within the parsed string.
  824. """
  825. s = strg
  826. return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
  827. def lineno(loc,strg):
  828. """Returns current line number within a string, counting newlines as line separators.
  829. The first line is number 1.
  830. Note: the default parsing behavior is to expand tabs in the input string
  831. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  832. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  833. consistent view of the parsed string, the parse location, and line and column
  834. positions within the parsed string.
  835. """
  836. return strg.count("\n",0,loc) + 1
  837. def line( loc, strg ):
  838. """Returns the line of text containing loc within a string, counting newlines as line separators.
  839. """
  840. lastCR = strg.rfind("\n", 0, loc)
  841. nextCR = strg.find("\n", loc)
  842. if nextCR >= 0:
  843. return strg[lastCR+1:nextCR]
  844. else:
  845. return strg[lastCR+1:]
  846. def _defaultStartDebugAction( instring, loc, expr ):
  847. print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
  848. def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
  849. print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
  850. def _defaultExceptionDebugAction( instring, loc, expr, exc ):
  851. print ("Exception raised:" + _ustr(exc))
  852. def nullDebugAction(*args):
  853. """'Do-nothing' debug action, to suppress debugging output during parsing."""
  854. pass
  855. # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
  856. #~ 'decorator to trim function calls to match the arity of the target'
  857. #~ def _trim_arity(func, maxargs=3):
  858. #~ if func in singleArgBuiltins:
  859. #~ return lambda s,l,t: func(t)
  860. #~ limit = 0
  861. #~ foundArity = False
  862. #~ def wrapper(*args):
  863. #~ nonlocal limit,foundArity
  864. #~ while 1:
  865. #~ try:
  866. #~ ret = func(*args[limit:])
  867. #~ foundArity = True
  868. #~ return ret
  869. #~ except TypeError:
  870. #~ if limit == maxargs or foundArity:
  871. #~ raise
  872. #~ limit += 1
  873. #~ continue
  874. #~ return wrapper
  875. # this version is Python 2.x-3.x cross-compatible
  876. 'decorator to trim function calls to match the arity of the target'
  877. def _trim_arity(func, maxargs=2):
  878. if func in singleArgBuiltins:
  879. return lambda s,l,t: func(t)
  880. limit = [0]
  881. foundArity = [False]
  882. # traceback return data structure changed in Py3.5 - normalize back to plain tuples
  883. if system_version[:2] >= (3,5):
  884. def extract_stack(limit=0):
  885. # special handling for Python 3.5.0 - extra deep call stack by 1
  886. offset = -3 if system_version == (3,5,0) else -2
  887. frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
  888. return [(frame_summary.filename, frame_summary.lineno)]
  889. def extract_tb(tb, limit=0):
  890. frames = traceback.extract_tb(tb, limit=limit)
  891. frame_summary = frames[-1]
  892. return [(frame_summary.filename, frame_summary.lineno)]
  893. else:
  894. extract_stack = traceback.extract_stack
  895. extract_tb = traceback.extract_tb
  896. # synthesize what would be returned by traceback.extract_stack at the call to
  897. # user's parse action 'func', so that we don't incur call penalty at parse time
  898. LINE_DIFF = 6
  899. # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
  900. # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
  901. this_line = extract_stack(limit=2)[-1]
  902. pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
  903. def wrapper(*args):
  904. while 1:
  905. try:
  906. ret = func(*args[limit[0]:])
  907. foundArity[0] = True
  908. return ret
  909. except TypeError:
  910. # re-raise TypeErrors if they did not come from our arity testing
  911. if foundArity[0]:
  912. raise
  913. else:
  914. try:
  915. tb = sys.exc_info()[-1]
  916. if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
  917. raise
  918. finally:
  919. del tb
  920. if limit[0] <= maxargs:
  921. limit[0] += 1
  922. continue
  923. raise
  924. # copy func name to wrapper for sensible debug output
  925. func_name = "<parse action>"
  926. try:
  927. func_name = getattr(func, '__name__',
  928. getattr(func, '__class__').__name__)
  929. except Exception:
  930. func_name = str(func)
  931. wrapper.__name__ = func_name
  932. return wrapper
  933. class ParserElement(object):
  934. """Abstract base level parser element class."""
  935. DEFAULT_WHITE_CHARS = " \n\t\r"
  936. verbose_stacktrace = False
  937. @staticmethod
  938. def setDefaultWhitespaceChars( chars ):
  939. r"""
  940. Overrides the default whitespace chars
  941. Example::
  942. # default whitespace chars are space, <TAB> and newline
  943. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
  944. # change to just treat newline as significant
  945. ParserElement.setDefaultWhitespaceChars(" \t")
  946. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
  947. """
  948. ParserElement.DEFAULT_WHITE_CHARS = chars
  949. @staticmethod
  950. def inlineLiteralsUsing(cls):
  951. """
  952. Set class to be used for inclusion of string literals into a parser.
  953. Example::
  954. # default literal class used is Literal
  955. integer = Word(nums)
  956. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  957. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  958. # change to Suppress
  959. ParserElement.inlineLiteralsUsing(Suppress)
  960. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  961. date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
  962. """
  963. ParserElement._literalStringClass = cls
  964. def __init__( self, savelist=False ):
  965. self.parseAction = list()
  966. self.failAction = None
  967. #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
  968. self.strRepr = None
  969. self.resultsName = None
  970. self.saveAsList = savelist
  971. self.skipWhitespace = True
  972. self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  973. self.copyDefaultWhiteChars = True
  974. self.mayReturnEmpty = False # used when checking for left-recursion
  975. self.keepTabs = False
  976. self.ignoreExprs = list()
  977. self.debug = False
  978. self.streamlined = False
  979. self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
  980. self.errmsg = ""
  981. self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
  982. self.debugActions = ( None, None, None ) #custom debug actions
  983. self.re = None
  984. self.callPreparse = True # used to avoid redundant calls to preParse
  985. self.callDuringTry = False
  986. def copy( self ):
  987. """
  988. Make a copy of this C{ParserElement}. Useful for defining different parse actions
  989. for the same parsing pattern, using copies of the original parse element.
  990. Example::
  991. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  992. integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
  993. integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  994. print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
  995. prints::
  996. [5120, 100, 655360, 268435456]
  997. Equivalent form of C{expr.copy()} is just C{expr()}::
  998. integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  999. """
  1000. cpy = copy.copy( self )
  1001. cpy.parseAction = self.parseAction[:]
  1002. cpy.ignoreExprs = self.ignoreExprs[:]
  1003. if self.copyDefaultWhiteChars:
  1004. cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1005. return cpy
  1006. def setName( self, name ):
  1007. """
  1008. Define name for this expression, makes debugging and exception messages clearer.
  1009. Example::
  1010. Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
  1011. Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
  1012. """
  1013. self.name = name
  1014. self.errmsg = "Expected " + self.name
  1015. if hasattr(self,"exception"):
  1016. self.exception.msg = self.errmsg
  1017. return self
  1018. def setResultsName( self, name, listAllMatches=False ):
  1019. """
  1020. Define name for referencing matching tokens as a nested attribute
  1021. of the returned parse results.
  1022. NOTE: this returns a *copy* of the original C{ParserElement} object;
  1023. this is so that the client can define a basic element, such as an
  1024. integer, and reference it in multiple places with different names.
  1025. You can also set results names using the abbreviated syntax,
  1026. C{expr("name")} in place of C{expr.setResultsName("name")} -
  1027. see L{I{__call__}<__call__>}.
  1028. Example::
  1029. date_str = (integer.setResultsName("year") + '/'
  1030. + integer.setResultsName("month") + '/'
  1031. + integer.setResultsName("day"))
  1032. # equivalent form:
  1033. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1034. """
  1035. newself = self.copy()
  1036. if name.endswith("*"):
  1037. name = name[:-1]
  1038. listAllMatches=True
  1039. newself.resultsName = name
  1040. newself.modalResults = not listAllMatches
  1041. return newself
  1042. def setBreak(self,breakFlag = True):
  1043. """Method to invoke the Python pdb debugger when this element is
  1044. about to be parsed. Set C{breakFlag} to True to enable, False to
  1045. disable.
  1046. """
  1047. if breakFlag:
  1048. _parseMethod = self._parse
  1049. def breaker(instring, loc, doActions=True, callPreParse=True):
  1050. import pdb
  1051. pdb.set_trace()
  1052. return _parseMethod( instring, loc, doActions, callPreParse )
  1053. breaker._originalParseMethod = _parseMethod
  1054. self._parse = breaker
  1055. else:
  1056. if hasattr(self._parse,"_originalParseMethod"):
  1057. self._parse = self._parse._originalParseMethod
  1058. return self
  1059. def setParseAction( self, *fns, **kwargs ):
  1060. """
  1061. Define action to perform when successfully matching parse element definition.
  1062. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
  1063. C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
  1064. - s = the original string being parsed (see note below)
  1065. - loc = the location of the matching substring
  1066. - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
  1067. If the functions in fns modify the tokens, they can return them as the return
  1068. value from fn, and the modified list of tokens will replace the original.
  1069. Otherwise, fn does not need to return any value.
  1070. Optional keyword arguments:
  1071. - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
  1072. Note: the default parsing behavior is to expand tabs in the input string
  1073. before starting the parsing process. See L{I{parseString}<parseString>} for more information
  1074. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  1075. consistent view of the parsed string, the parse location, and line and column
  1076. positions within the parsed string.
  1077. Example::
  1078. integer = Word(nums)
  1079. date_str = integer + '/' + integer + '/' + integer
  1080. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1081. # use parse action to convert to ints at parse time
  1082. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1083. date_str = integer + '/' + integer + '/' + integer
  1084. # note that integer fields are now ints, not strings
  1085. date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
  1086. """
  1087. self.parseAction = list(map(_trim_arity, list(fns)))
  1088. self.callDuringTry = kwargs.get("callDuringTry", False)
  1089. return self
  1090. def addParseAction( self, *fns, **kwargs ):
  1091. """
  1092. Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
  1093. See examples in L{I{copy}<copy>}.
  1094. """
  1095. self.parseAction += list(map(_trim_arity, list(fns)))
  1096. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1097. return self
  1098. def addCondition(self, *fns, **kwargs):
  1099. """Add a boolean predicate function to expression's list of parse actions. See
  1100. L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
  1101. functions passed to C{addCondition} need to return boolean success/fail of the condition.
  1102. Optional keyword arguments:
  1103. - message = define a custom message to be used in the raised exception
  1104. - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
  1105. Example::
  1106. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1107. year_int = integer.copy()
  1108. year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
  1109. date_str = year_int + '/' + integer + '/' + integer
  1110. result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
  1111. """
  1112. msg = kwargs.get("message", "failed user-defined condition")
  1113. exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
  1114. for fn in fns:
  1115. def pa(s,l,t):
  1116. if not bool(_trim_arity(fn)(s,l,t)):
  1117. raise exc_type(s,l,msg)
  1118. self.parseAction.append(pa)
  1119. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1120. return self
  1121. def setFailAction( self, fn ):
  1122. """Define action to perform if parsing fails at this expression.
  1123. Fail acton fn is a callable function that takes the arguments
  1124. C{fn(s,loc,expr,err)} where:
  1125. - s = string being parsed
  1126. - loc = location where expression match was attempted and failed
  1127. - expr = the parse expression that failed
  1128. - err = the exception thrown
  1129. The function returns no value. It may throw C{L{ParseFatalException}}
  1130. if it is desired to stop parsing immediately."""
  1131. self.failAction = fn
  1132. return self
  1133. def _skipIgnorables( self, instring, loc ):
  1134. exprsFound = True
  1135. while exprsFound:
  1136. exprsFound = False
  1137. for e in self.ignoreExprs:
  1138. try:
  1139. while 1:
  1140. loc,dummy = e._parse( instring, loc )
  1141. exprsFound = True
  1142. except ParseException:
  1143. pass
  1144. return loc
  1145. def preParse( self, instring, loc ):
  1146. if self.ignoreExprs:
  1147. loc = self._skipIgnorables( instring, loc )
  1148. if self.skipWhitespace:
  1149. wt = self.whiteChars
  1150. instrlen = len(instring)
  1151. while loc < instrlen and instring[loc] in wt:
  1152. loc += 1
  1153. return loc
  1154. def parseImpl( self, instring, loc, doActions=True ):
  1155. return loc, []
  1156. def postParse( self, instring, loc, tokenlist ):
  1157. return tokenlist
  1158. #~ @profile
  1159. def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
  1160. debugging = ( self.debug ) #and doActions )
  1161. if debugging or self.failAction:
  1162. #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
  1163. if (self.debugActions[0] ):
  1164. self.debugActions[0]( instring, loc, self )
  1165. if callPreParse and self.callPreparse:
  1166. preloc = self.preParse( instring, loc )
  1167. else:
  1168. preloc = loc
  1169. tokensStart = preloc
  1170. try:
  1171. try:
  1172. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1173. except IndexError:
  1174. raise ParseException( instring, len(instring), self.errmsg, self )
  1175. except ParseBaseException as err:
  1176. #~ print ("Exception raised:", err)
  1177. if self.debugActions[2]:
  1178. self.debugActions[2]( instring, tokensStart, self, err )
  1179. if self.failAction:
  1180. self.failAction( instring, tokensStart, self, err )
  1181. raise
  1182. else:
  1183. if callPreParse and self.callPreparse:
  1184. preloc = self.preParse( instring, loc )
  1185. else:
  1186. preloc = loc
  1187. tokensStart = preloc
  1188. if self.mayIndexError or loc >= len(instring):
  1189. try:
  1190. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1191. except IndexError:
  1192. raise ParseException( instring, len(instring), self.errmsg, self )
  1193. else:
  1194. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1195. tokens = self.postParse( instring, loc, tokens )
  1196. retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
  1197. if self.parseAction and (doActions or self.callDuringTry):
  1198. if debugging:
  1199. try:
  1200. for fn in self.parseAction:
  1201. tokens = fn( instring, tokensStart, retTokens )
  1202. if tokens is not None:
  1203. retTokens = ParseResults( tokens,
  1204. self.resultsName,
  1205. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1206. modal=self.modalResults )
  1207. except ParseBaseException as err:
  1208. #~ print "Exception raised in user parse action:", err
  1209. if (self.debugActions[2] ):
  1210. self.debugActions[2]( instring, tokensStart, self, err )
  1211. raise
  1212. else:
  1213. for fn in self.parseAction:
  1214. tokens = fn( instring, tokensStart, retTokens )
  1215. if tokens is not None:
  1216. retTokens = ParseResults( tokens,
  1217. self.resultsName,
  1218. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1219. modal=self.modalResults )
  1220. if debugging:
  1221. #~ print ("Matched",self,"->",retTokens.asList())
  1222. if (self.debugActions[1] ):
  1223. self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
  1224. return loc, retTokens
  1225. def tryParse( self, instring, loc ):
  1226. try:
  1227. return self._parse( instring, loc, doActions=False )[0]
  1228. except ParseFatalException:
  1229. raise ParseException( instring, loc, self.errmsg, self)
  1230. def canParseNext(self, instring, loc):
  1231. try:
  1232. self.tryParse(instring, loc)
  1233. except (ParseException, IndexError):
  1234. return False
  1235. else:
  1236. return True
  1237. class _UnboundedCache(object):
  1238. def __init__(self):
  1239. cache = {}
  1240. self.not_in_cache = not_in_cache = object()
  1241. def get(self, key):
  1242. return cache.get(key, not_in_cache)
  1243. def set(self, key, value):
  1244. cache[key] = value
  1245. def clear(self):
  1246. cache.clear()
  1247. self.get = types.MethodType(get, self)
  1248. self.set = types.MethodType(set, self)
  1249. self.clear = types.MethodType(clear, self)
  1250. if _OrderedDict is not None:
  1251. class _FifoCache(object):
  1252. def __init__(self, size):
  1253. self.not_in_cache = not_in_cache = object()
  1254. cache = _OrderedDict()
  1255. def get(self, key):
  1256. return cache.get(key, not_in_cache)
  1257. def set(self, key, value):
  1258. cache[key] = value
  1259. if len(cache) > size:
  1260. cache.popitem(False)
  1261. def clear(self):
  1262. cache.clear()
  1263. self.get = types.MethodType(get, self)
  1264. self.set = types.MethodType(set, self)
  1265. self.clear = types.MethodType(clear, self)
  1266. else:
  1267. class _FifoCache(object):
  1268. def __init__(self, size):
  1269. self.not_in_cache = not_in_cache = object()
  1270. cache = {}
  1271. key_fifo = collections.deque([], size)
  1272. def get(self, key):
  1273. return cache.get(key, not_in_cache)
  1274. def set(self, key, value):
  1275. cache[key] = value
  1276. if len(cache) > size:
  1277. cache.pop(key_fifo.popleft(), None)
  1278. key_fifo.append(key)
  1279. def clear(self):
  1280. cache.clear()
  1281. key_fifo.clear()
  1282. self.get = types.MethodType(get, self)
  1283. self.set = types.MethodType(set, self)
  1284. self.clear = types.MethodType(clear, self)
  1285. # argument cache for optimizing repeated calls when backtracking through recursive expressions
  1286. packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
  1287. packrat_cache_lock = RLock()
  1288. packrat_cache_stats = [0, 0]
  1289. # this method gets repeatedly called during backtracking with the same arguments -
  1290. # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
  1291. def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
  1292. HIT, MISS = 0, 1
  1293. lookup = (self, instring, loc, callPreParse, doActions)
  1294. with ParserElement.packrat_cache_lock:
  1295. cache = ParserElement.packrat_cache
  1296. value = cache.get(lookup)
  1297. if value is cache.not_in_cache:
  1298. ParserElement.packrat_cache_stats[MISS] += 1
  1299. try:
  1300. value = self._parseNoCache(instring, loc, doActions, callPreParse)
  1301. except ParseBaseException as pe:
  1302. # cache a copy of the exception, without the traceback
  1303. cache.set(lookup, pe.__class__(*pe.args))
  1304. raise
  1305. else:
  1306. cache.set(lookup, (value[0], value[1].copy()))
  1307. return value
  1308. else:
  1309. ParserElement.packrat_cache_stats[HIT] += 1
  1310. if isinstance(value, Exception):
  1311. raise value
  1312. return (value[0], value[1].copy())
  1313. _parse = _parseNoCache
  1314. @staticmethod
  1315. def resetCache():
  1316. ParserElement.packrat_cache.clear()
  1317. ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
  1318. _packratEnabled = False
  1319. @staticmethod
  1320. def enablePackrat(cache_size_limit=128):
  1321. """Enables "packrat" parsing, which adds memoizing to the parsing logic.
  1322. Repeated parse attempts at the same string location (which happens
  1323. often in many complex grammars) can immediately return a cached value,
  1324. instead of re-executing parsing/validating code. Memoizing is done of
  1325. both valid results and parsing exceptions.
  1326. Parameters:
  1327. - cache_size_limit - (default=C{128}) - if an integer value is provided
  1328. will limit the size of the packrat cache; if None is passed, then
  1329. the cache size will be unbounded; if 0 is passed, the cache will
  1330. be effectively disabled.
  1331. This speedup may break existing programs that use parse actions that
  1332. have side-effects. For this reason, packrat parsing is disabled when
  1333. you first import pyparsing. To activate the packrat feature, your
  1334. program must call the class method C{ParserElement.enablePackrat()}. If
  1335. your program uses C{psyco} to "compile as you go", you must call
  1336. C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
  1337. Python will crash. For best results, call C{enablePackrat()} immediately
  1338. after importing pyparsing.
  1339. Example::
  1340. import pyparsing
  1341. pyparsing.ParserElement.enablePackrat()
  1342. """
  1343. if not ParserElement._packratEnabled:
  1344. ParserElement._packratEnabled = True
  1345. if cache_size_limit is None:
  1346. ParserElement.packrat_cache = ParserElement._UnboundedCache()
  1347. else:
  1348. ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
  1349. ParserElement._parse = ParserElement._parseCache
  1350. def parseString( self, instring, parseAll=False ):
  1351. """
  1352. Execute the parse expression with the given string.
  1353. This is the main interface to the client code, once the complete
  1354. expression has been built.
  1355. If you want the grammar to require that the entire input string be
  1356. successfully parsed, then set C{parseAll} to True (equivalent to ending
  1357. the grammar with C{L{StringEnd()}}).
  1358. Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
  1359. in order to report proper column numbers in parse actions.
  1360. If the input string contains tabs and
  1361. the grammar uses parse actions that use the C{loc} argument to index into the
  1362. string being parsed, you can ensure you have a consistent view of the input
  1363. string by:
  1364. - calling C{parseWithTabs} on your grammar before calling C{parseString}
  1365. (see L{I{parseWithTabs}<parseWithTabs>})
  1366. - define your parse action using the full C{(s,loc,toks)} signature, and
  1367. reference the input string using the parse action's C{s} argument
  1368. - explictly expand the tabs in your input string before calling
  1369. C{parseString}
  1370. Example::
  1371. Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
  1372. Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
  1373. """
  1374. ParserElement.resetCache()
  1375. if not self.streamlined:
  1376. self.streamline()
  1377. #~ self.saveAsList = True
  1378. for e in self.ignoreExprs:
  1379. e.streamline()
  1380. if not self.keepTabs:
  1381. instring = instring.expandtabs()
  1382. try:
  1383. loc, tokens = self._parse( instring, 0 )
  1384. if parseAll:
  1385. loc = self.preParse( instring, loc )
  1386. se = Empty() + StringEnd()
  1387. se._parse( instring, loc )
  1388. except ParseBaseException as exc:
  1389. if ParserElement.verbose_stacktrace:
  1390. raise
  1391. else:
  1392. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1393. raise exc
  1394. else:
  1395. return tokens
  1396. def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
  1397. """
  1398. Scan the input string for expression matches. Each match will return the
  1399. matching tokens, start location, and end location. May be called with optional
  1400. C{maxMatches} argument, to clip scanning after 'n' matches are found. If
  1401. C{overlap} is specified, then overlapping matches will be reported.
  1402. Note that the start and end locations are reported relative to the string
  1403. being parsed. See L{I{parseString}<parseString>} for more information on parsing
  1404. strings with embedded tabs.
  1405. Example::
  1406. source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
  1407. print(source)
  1408. for tokens,start,end in Word(alphas).scanString(source):
  1409. print(' '*start + '^'*(end-start))
  1410. print(' '*start + tokens[0])
  1411. prints::
  1412. sldjf123lsdjjkf345sldkjf879lkjsfd987
  1413. ^^^^^
  1414. sldjf
  1415. ^^^^^^^
  1416. lsdjjkf
  1417. ^^^^^^
  1418. sldkjf
  1419. ^^^^^^
  1420. lkjsfd
  1421. """
  1422. if not self.streamlined:
  1423. self.streamline()
  1424. for e in self.ignoreExprs:
  1425. e.streamline()
  1426. if not self.keepTabs:
  1427. instring = _ustr(instring).expandtabs()
  1428. instrlen = len(instring)
  1429. loc = 0
  1430. preparseFn = self.preParse
  1431. parseFn = self._parse
  1432. ParserElement.resetCache()
  1433. matches = 0
  1434. try:
  1435. while loc <= instrlen and matches < maxMatches:
  1436. try:
  1437. preloc = preparseFn( instring, loc )
  1438. nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
  1439. except ParseException:
  1440. loc = preloc+1
  1441. else:
  1442. if nextLoc > loc:
  1443. matches += 1
  1444. yield tokens, preloc, nextLoc
  1445. if overlap:
  1446. nextloc = preparseFn( instring, loc )
  1447. if nextloc > loc:
  1448. loc = nextLoc
  1449. else:
  1450. loc += 1
  1451. else:
  1452. loc = nextLoc
  1453. else:
  1454. loc = preloc+1
  1455. except ParseBaseException as exc:
  1456. if ParserElement.verbose_stacktrace:
  1457. raise
  1458. else:
  1459. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1460. raise exc
  1461. def transformString( self, instring ):
  1462. """
  1463. Extension to C{L{scanString}}, to modify matching text with modified tokens that may
  1464. be returned from a parse action. To use C{transformString}, define a grammar and
  1465. attach a parse action to it that modifies the returned token list.
  1466. Invoking C{transformString()} on a target string will then scan for matches,
  1467. and replace the matched text patterns according to the logic in the parse
  1468. action. C{transformString()} returns the resulting transformed string.
  1469. Example::
  1470. wd = Word(alphas)
  1471. wd.setParseAction(lambda toks: toks[0].title())
  1472. print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
  1473. Prints::
  1474. Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
  1475. """
  1476. out = []
  1477. lastE = 0
  1478. # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
  1479. # keep string locs straight between transformString and scanString
  1480. self.keepTabs = True
  1481. try:
  1482. for t,s,e in self.scanString( instring ):
  1483. out.append( instring[lastE:s] )
  1484. if t:
  1485. if isinstance(t,ParseResults):
  1486. out += t.asList()
  1487. elif isinstance(t,list):
  1488. out += t
  1489. else:
  1490. out.append(t)
  1491. lastE = e
  1492. out.append(instring[lastE:])
  1493. out = [o for o in out if o]
  1494. return "".join(map(_ustr,_flatten(out)))
  1495. except ParseBaseException as exc:
  1496. if ParserElement.verbose_stacktrace:
  1497. raise
  1498. else:
  1499. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1500. raise exc
  1501. def searchString( self, instring, maxMatches=_MAX_INT ):
  1502. """
  1503. Another extension to C{L{scanString}}, simplifying the access to the tokens found
  1504. to match the given parse expression. May be called with optional
  1505. C{maxMatches} argument, to clip searching after 'n' matches are found.
  1506. Example::
  1507. # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
  1508. cap_word = Word(alphas.upper(), alphas.lower())
  1509. print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
  1510. prints::
  1511. ['More', 'Iron', 'Lead', 'Gold', 'I']
  1512. """
  1513. try:
  1514. return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
  1515. except ParseBaseException as exc:
  1516. if ParserElement.verbose_stacktrace:
  1517. raise
  1518. else:
  1519. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1520. raise exc
  1521. def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
  1522. """
  1523. Generator method to split a string using the given expression as a separator.
  1524. May be called with optional C{maxsplit} argument, to limit the number of splits;
  1525. and the optional C{includeSeparators} argument (default=C{False}), if the separating
  1526. matching text should be included in the split results.
  1527. Example::
  1528. punc = oneOf(list(".,;:/-!?"))
  1529. print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
  1530. prints::
  1531. ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
  1532. """
  1533. splits = 0
  1534. last = 0
  1535. for t,s,e in self.scanString(instring, maxMatches=maxsplit):
  1536. yield instring[last:s]
  1537. if includeSeparators:
  1538. yield t[0]
  1539. last = e
  1540. yield instring[last:]
  1541. def __add__(self, other ):
  1542. """
  1543. Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
  1544. converts them to L{Literal}s by default.
  1545. Example::
  1546. greet = Word(alphas) + "," + Word(alphas) + "!"
  1547. hello = "Hello, World!"
  1548. print (hello, "->", greet.parseString(hello))
  1549. Prints::
  1550. Hello, World! -> ['Hello', ',', 'World', '!']
  1551. """
  1552. if isinstance( other, basestring ):
  1553. other = ParserElement._literalStringClass( other )
  1554. if not isinstance( other, ParserElement ):
  1555. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1556. SyntaxWarning, stacklevel=2)
  1557. return None
  1558. return And( [ self, other ] )
  1559. def __radd__(self, other ):
  1560. """
  1561. Implementation of + operator when left operand is not a C{L{ParserElement}}
  1562. """
  1563. if isinstance( other, basestring ):
  1564. other = ParserElement._literalStringClass( other )
  1565. if not isinstance( other, ParserElement ):
  1566. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1567. SyntaxWarning, stacklevel=2)
  1568. return None
  1569. return other + self
  1570. def __sub__(self, other):
  1571. """
  1572. Implementation of - operator, returns C{L{And}} with error stop
  1573. """
  1574. if isinstance( other, basestring ):
  1575. other = ParserElement._literalStringClass( other )
  1576. if not isinstance( other, ParserElement ):
  1577. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1578. SyntaxWarning, stacklevel=2)
  1579. return None
  1580. return And( [ self, And._ErrorStop(), other ] )
  1581. def __rsub__(self, other ):
  1582. """
  1583. Implementation of - operator when left operand is not a C{L{ParserElement}}
  1584. """
  1585. if isinstance( other, basestring ):
  1586. other = ParserElement._literalStringClass( other )
  1587. if not isinstance( other, ParserElement ):
  1588. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1589. SyntaxWarning, stacklevel=2)
  1590. return None
  1591. return other - self
  1592. def __mul__(self,other):
  1593. """
  1594. Implementation of * operator, allows use of C{expr * 3} in place of
  1595. C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
  1596. tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
  1597. may also include C{None} as in:
  1598. - C{expr*(n,None)} or C{expr*(n,)} is equivalent
  1599. to C{expr*n + L{ZeroOrMore}(expr)}
  1600. (read as "at least n instances of C{expr}")
  1601. - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
  1602. (read as "0 to n instances of C{expr}")
  1603. - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
  1604. - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
  1605. Note that C{expr*(None,n)} does not raise an exception if
  1606. more than n exprs exist in the input stream; that is,
  1607. C{expr*(None,n)} does not enforce a maximum number of expr
  1608. occurrences. If this behavior is desired, then write
  1609. C{expr*(None,n) + ~expr}
  1610. """
  1611. if isinstance(other,int):
  1612. minElements, optElements = other,0
  1613. elif isinstance(other,tuple):
  1614. other = (other + (None, None))[:2]
  1615. if other[0] is None:
  1616. other = (0, other[1])
  1617. if isinstance(other[0],int) and other[1] is None:
  1618. if other[0] == 0:
  1619. return ZeroOrMore(self)
  1620. if other[0] == 1:
  1621. return OneOrMore(self)
  1622. else:
  1623. return self*other[0] + ZeroOrMore(self)
  1624. elif isinstance(other[0],int) and isinstance(other[1],int):
  1625. minElements, optElements = other
  1626. optElements -= minElements
  1627. else:
  1628. raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
  1629. else:
  1630. raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
  1631. if minElements < 0:
  1632. raise ValueError("cannot multiply ParserElement by negative value")
  1633. if optElements < 0:
  1634. raise ValueError("second tuple value must be greater or equal to first tuple value")
  1635. if minElements == optElements == 0:
  1636. raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
  1637. if (optElements):
  1638. def makeOptionalList(n):
  1639. if n>1:
  1640. return Optional(self + makeOptionalList(n-1))
  1641. else:
  1642. return Optional(self)
  1643. if minElements:
  1644. if minElements == 1:
  1645. ret = self + makeOptionalList(optElements)
  1646. else:
  1647. ret = And([self]*minElements) + makeOptionalList(optElements)
  1648. else:
  1649. ret = makeOptionalList(optElements)
  1650. else:
  1651. if minElements == 1:
  1652. ret = self
  1653. else:
  1654. ret = And([self]*minElements)
  1655. return ret
  1656. def __rmul__(self, other):
  1657. return self.__mul__(other)
  1658. def __or__(self, other ):
  1659. """
  1660. Implementation of | operator - returns C{L{MatchFirst}}
  1661. """
  1662. if isinstance( other, basestring ):
  1663. other = ParserElement._literalStringClass( other )
  1664. if not isinstance( other, ParserElement ):
  1665. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1666. SyntaxWarning, stacklevel=2)
  1667. return None
  1668. return MatchFirst( [ self, other ] )
  1669. def __ror__(self, other ):
  1670. """
  1671. Implementation of | operator when left operand is not a C{L{ParserElement}}
  1672. """
  1673. if isinstance( other, basestring ):
  1674. other = ParserElement._literalStringClass( other )
  1675. if not isinstance( other, ParserElement ):
  1676. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1677. SyntaxWarning, stacklevel=2)
  1678. return None
  1679. return other | self
  1680. def __xor__(self, other ):
  1681. """
  1682. Implementation of ^ operator - returns C{L{Or}}
  1683. """
  1684. if isinstance( other, basestring ):
  1685. other = ParserElement._literalStringClass( other )
  1686. if not isinstance( other, ParserElement ):
  1687. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1688. SyntaxWarning, stacklevel=2)
  1689. return None
  1690. return Or( [ self, other ] )
  1691. def __rxor__(self, other ):
  1692. """
  1693. Implementation of ^ operator when left operand is not a C{L{ParserElement}}
  1694. """
  1695. if isinstance( other, basestring ):
  1696. other = ParserElement._literalStringClass( other )
  1697. if not isinstance( other, ParserElement ):
  1698. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1699. SyntaxWarning, stacklevel=2)
  1700. return None
  1701. return other ^ self
  1702. def __and__(self, other ):
  1703. """
  1704. Implementation of & operator - returns C{L{Each}}
  1705. """
  1706. if isinstance( other, basestring ):
  1707. other = ParserElement._literalStringClass( other )
  1708. if not isinstance( other, ParserElement ):
  1709. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1710. SyntaxWarning, stacklevel=2)
  1711. return None
  1712. return Each( [ self, other ] )
  1713. def __rand__(self, other ):
  1714. """
  1715. Implementation of & operator when left operand is not a C{L{ParserElement}}
  1716. """
  1717. if isinstance( other, basestring ):
  1718. other = ParserElement._literalStringClass( other )
  1719. if not isinstance( other, ParserElement ):
  1720. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1721. SyntaxWarning, stacklevel=2)
  1722. return None
  1723. return other & self
  1724. def __invert__( self ):
  1725. """
  1726. Implementation of ~ operator - returns C{L{NotAny}}
  1727. """
  1728. return NotAny( self )
  1729. def __call__(self, name=None):
  1730. """
  1731. Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
  1732. If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
  1733. passed as C{True}.
  1734. If C{name} is omitted, same as calling C{L{copy}}.
  1735. Example::
  1736. # these are equivalent
  1737. userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
  1738. userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
  1739. """
  1740. if name is not None:
  1741. return self.setResultsName(name)
  1742. else:
  1743. return self.copy()
  1744. def suppress( self ):
  1745. """
  1746. Suppresses the output of this C{ParserElement}; useful to keep punctuation from
  1747. cluttering up returned output.
  1748. """
  1749. return Suppress( self )
  1750. def leaveWhitespace( self ):
  1751. """
  1752. Disables the skipping of whitespace before matching the characters in the
  1753. C{ParserElement}'s defined pattern. This is normally only used internally by
  1754. the pyparsing module, but may be needed in some whitespace-sensitive grammars.
  1755. """
  1756. self.skipWhitespace = False
  1757. return self
  1758. def setWhitespaceChars( self, chars ):
  1759. """
  1760. Overrides the default whitespace chars
  1761. """
  1762. self.skipWhitespace = True
  1763. self.whiteChars = chars
  1764. self.copyDefaultWhiteChars = False
  1765. return self
  1766. def parseWithTabs( self ):
  1767. """
  1768. Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
  1769. Must be called before C{parseString} when the input grammar contains elements that
  1770. match C{<TAB>} characters.
  1771. """
  1772. self.keepTabs = True
  1773. return self
  1774. def ignore( self, other ):
  1775. """
  1776. Define expression to be ignored (e.g., comments) while doing pattern
  1777. matching; may be called repeatedly, to define multiple comment or other
  1778. ignorable patterns.
  1779. Example::
  1780. patt = OneOrMore(Word(alphas))
  1781. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
  1782. patt.ignore(cStyleComment)
  1783. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
  1784. """
  1785. if isinstance(other, basestring):
  1786. other = Suppress(other)
  1787. if isinstance( other, Suppress ):
  1788. if other not in self.ignoreExprs:
  1789. self.ignoreExprs.append(other)
  1790. else:
  1791. self.ignoreExprs.append( Suppress( other.copy() ) )
  1792. return self
  1793. def setDebugActions( self, startAction, successAction, exceptionAction ):
  1794. """
  1795. Enable display of debugging messages while doing pattern matching.
  1796. """
  1797. self.debugActions = (startAction or _defaultStartDebugAction,
  1798. successAction or _defaultSuccessDebugAction,
  1799. exceptionAction or _defaultExceptionDebugAction)
  1800. self.debug = True
  1801. return self
  1802. def setDebug( self, flag=True ):
  1803. """
  1804. Enable display of debugging messages while doing pattern matching.
  1805. Set C{flag} to True to enable, False to disable.
  1806. Example::
  1807. wd = Word(alphas).setName("alphaword")
  1808. integer = Word(nums).setName("numword")
  1809. term = wd | integer
  1810. # turn on debugging for wd
  1811. wd.setDebug()
  1812. OneOrMore(term).parseString("abc 123 xyz 890")
  1813. prints::
  1814. Match alphaword at loc 0(1,1)
  1815. Matched alphaword -> ['abc']
  1816. Match alphaword at loc 3(1,4)
  1817. Exception raised:Expected alphaword (at char 4), (line:1, col:5)
  1818. Match alphaword at loc 7(1,8)
  1819. Matched alphaword -> ['xyz']
  1820. Match alphaword at loc 11(1,12)
  1821. Exception raised:Expected alphaword (at char 12), (line:1, col:13)
  1822. Match alphaword at loc 15(1,16)
  1823. Exception raised:Expected alphaword (at char 15), (line:1, col:16)
  1824. The output shown is that produced by the default debug actions - custom debug actions can be
  1825. specified using L{setDebugActions}. Prior to attempting
  1826. to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
  1827. is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
  1828. message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
  1829. which makes debugging and exception messages easier to understand - for instance, the default
  1830. name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
  1831. """
  1832. if flag:
  1833. self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  1834. else:
  1835. self.debug = False
  1836. return self
  1837. def __str__( self ):
  1838. return self.name
  1839. def __repr__( self ):
  1840. return _ustr(self)
  1841. def streamline( self ):
  1842. self.streamlined = True
  1843. self.strRepr = None
  1844. return self
  1845. def checkRecursion( self, parseElementList ):
  1846. pass
  1847. def validate( self, validateTrace=[] ):
  1848. """
  1849. Check defined expressions for valid structure, check for infinite recursive definitions.
  1850. """
  1851. self.checkRecursion( [] )
  1852. def parseFile( self, file_or_filename, parseAll=False ):
  1853. """
  1854. Execute the parse expression on the given file or filename.
  1855. If a filename is specified (instead of a file object),
  1856. the entire file is opened, read, and closed before parsing.
  1857. """
  1858. try:
  1859. file_contents = file_or_filename.read()
  1860. except AttributeError:
  1861. with open(file_or_filename, "r") as f:
  1862. file_contents = f.read()
  1863. try:
  1864. return self.parseString(file_contents, parseAll)
  1865. except ParseBaseException as exc:
  1866. if ParserElement.verbose_stacktrace:
  1867. raise
  1868. else:
  1869. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1870. raise exc
  1871. def __eq__(self,other):
  1872. if isinstance(other, ParserElement):
  1873. return self is other or vars(self) == vars(other)
  1874. elif isinstance(other, basestring):
  1875. return self.matches(other)
  1876. else:
  1877. return super(ParserElement,self)==other
  1878. def __ne__(self,other):
  1879. return not (self == other)
  1880. def __hash__(self):
  1881. return hash(id(self))
  1882. def __req__(self,other):
  1883. return self == other
  1884. def __rne__(self,other):
  1885. return not (self == other)
  1886. def matches(self, testString, parseAll=True):
  1887. """
  1888. Method for quick testing of a parser against a test string. Good for simple
  1889. inline microtests of sub expressions while building up larger parser.
  1890. Parameters:
  1891. - testString - to test against this expression for a match
  1892. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1893. Example::
  1894. expr = Word(nums)
  1895. assert expr.matches("100")
  1896. """
  1897. try:
  1898. self.parseString(_ustr(testString), parseAll=parseAll)
  1899. return True
  1900. except ParseBaseException:
  1901. return False
  1902. def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
  1903. """
  1904. Execute the parse expression on a series of test strings, showing each
  1905. test, the parsed results or where the parse failed. Quick and easy way to
  1906. run a parse expression against a list of sample strings.
  1907. Parameters:
  1908. - tests - a list of separate test strings, or a multiline string of test strings
  1909. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1910. - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
  1911. string; pass None to disable comment filtering
  1912. - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
  1913. if False, only dump nested list
  1914. - printResults - (default=C{True}) prints test output to stdout
  1915. - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
  1916. Returns: a (success, results) tuple, where success indicates that all tests succeeded
  1917. (or failed if C{failureTests} is True), and the results contain a list of lines of each
  1918. test's output
  1919. Example::
  1920. number_expr = pyparsing_common.number.copy()
  1921. result = number_expr.runTests('''
  1922. # unsigned integer
  1923. 100
  1924. # negative integer
  1925. -100
  1926. # float with scientific notation
  1927. 6.02e23
  1928. # integer with scientific notation
  1929. 1e-12
  1930. ''')
  1931. print("Success" if result[0] else "Failed!")
  1932. result = number_expr.runTests('''
  1933. # stray character
  1934. 100Z
  1935. # missing leading digit before '.'
  1936. -.100
  1937. # too many '.'
  1938. 3.14.159
  1939. ''', failureTests=True)
  1940. print("Success" if result[0] else "Failed!")
  1941. prints::
  1942. # unsigned integer
  1943. 100
  1944. [100]
  1945. # negative integer
  1946. -100
  1947. [-100]
  1948. # float with scientific notation
  1949. 6.02e23
  1950. [6.02e+23]
  1951. # integer with scientific notation
  1952. 1e-12
  1953. [1e-12]
  1954. Success
  1955. # stray character
  1956. 100Z
  1957. ^
  1958. FAIL: Expected end of text (at char 3), (line:1, col:4)
  1959. # missing leading digit before '.'
  1960. -.100
  1961. ^
  1962. FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
  1963. # too many '.'
  1964. 3.14.159
  1965. ^
  1966. FAIL: Expected end of text (at char 4), (line:1, col:5)
  1967. Success
  1968. Each test string must be on a single line. If you want to test a string that spans multiple
  1969. lines, create a test like this::
  1970. expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
  1971. (Note that this is a raw string literal, you must include the leading 'r'.)
  1972. """
  1973. if isinstance(tests, basestring):
  1974. tests = list(map(str.strip, tests.rstrip().splitlines()))
  1975. if isinstance(comment, basestring):
  1976. comment = Literal(comment)
  1977. allResults = []
  1978. comments = []
  1979. success = True
  1980. for t in tests:
  1981. if comment is not None and comment.matches(t, False) or comments and not t:
  1982. comments.append(t)
  1983. continue
  1984. if not t:
  1985. continue
  1986. out = ['\n'.join(comments), t]
  1987. comments = []
  1988. try:
  1989. t = t.replace(r'\n','\n')
  1990. result = self.parseString(t, parseAll=parseAll)
  1991. out.append(result.dump(full=fullDump))
  1992. success = success and not failureTests
  1993. except ParseBaseException as pe:
  1994. fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
  1995. if '\n' in t:
  1996. out.append(line(pe.loc, t))
  1997. out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
  1998. else:
  1999. out.append(' '*pe.loc + '^' + fatal)
  2000. out.append("FAIL: " + str(pe))
  2001. success = success and failureTests
  2002. result = pe
  2003. except Exception as exc:
  2004. out.append("FAIL-EXCEPTION: " + str(exc))
  2005. success = success and failureTests
  2006. result = exc
  2007. if printResults:
  2008. if fullDump:
  2009. out.append('')
  2010. print('\n'.join(out))
  2011. allResults.append((t, result))
  2012. return success, allResults
  2013. class Token(ParserElement):
  2014. """
  2015. Abstract C{ParserElement} subclass, for defining atomic matching patterns.
  2016. """
  2017. def __init__( self ):
  2018. super(Token,self).__init__( savelist=False )
  2019. class Empty(Token):
  2020. """
  2021. An empty token, will always match.
  2022. """
  2023. def __init__( self ):
  2024. super(Empty,self).__init__()
  2025. self.name = "Empty"
  2026. self.mayReturnEmpty = True
  2027. self.mayIndexError = False
  2028. class NoMatch(Token):
  2029. """
  2030. A token that will never match.
  2031. """
  2032. def __init__( self ):
  2033. super(NoMatch,self).__init__()
  2034. self.name = "NoMatch"
  2035. self.mayReturnEmpty = True
  2036. self.mayIndexError = False
  2037. self.errmsg = "Unmatchable token"
  2038. def parseImpl( self, instring, loc, doActions=True ):
  2039. raise ParseException(instring, loc, self.errmsg, self)
  2040. class Literal(Token):
  2041. """
  2042. Token to exactly match a specified string.
  2043. Example::
  2044. Literal('blah').parseString('blah') # -> ['blah']
  2045. Literal('blah').parseString('blahfooblah') # -> ['blah']
  2046. Literal('blah').parseString('bla') # -> Exception: Expected "blah"
  2047. For case-insensitive matching, use L{CaselessLiteral}.
  2048. For keyword matching (force word break before and after the matched string),
  2049. use L{Keyword} or L{CaselessKeyword}.
  2050. """
  2051. def __init__( self, matchString ):
  2052. super(Literal,self).__init__()
  2053. self.match = matchString
  2054. self.matchLen = len(matchString)
  2055. try:
  2056. self.firstMatchChar = matchString[0]
  2057. except IndexError:
  2058. warnings.warn("null string passed to Literal; use Empty() instead",
  2059. SyntaxWarning, stacklevel=2)
  2060. self.__class__ = Empty
  2061. self.name = '"%s"' % _ustr(self.match)
  2062. self.errmsg = "Expected " + self.name
  2063. self.mayReturnEmpty = False
  2064. self.mayIndexError = False
  2065. # Performance tuning: this routine gets called a *lot*
  2066. # if this is a single character match string and the first character matches,
  2067. # short-circuit as quickly as possible, and avoid calling startswith
  2068. #~ @profile
  2069. def parseImpl( self, instring, loc, doActions=True ):
  2070. if (instring[loc] == self.firstMatchChar and
  2071. (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  2072. return loc+self.matchLen, self.match
  2073. raise ParseException(instring, loc, self.errmsg, self)
  2074. _L = Literal
  2075. ParserElement._literalStringClass = Literal
  2076. class Keyword(Token):
  2077. """
  2078. Token to exactly match a specified string as a keyword, that is, it must be
  2079. immediately followed by a non-keyword character. Compare with C{L{Literal}}:
  2080. - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
  2081. - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
  2082. Accepts two optional constructor arguments in addition to the keyword string:
  2083. - C{identChars} is a string of characters that would be valid identifier characters,
  2084. defaulting to all alphanumerics + "_" and "$"
  2085. - C{caseless} allows case-insensitive matching, default is C{False}.
  2086. Example::
  2087. Keyword("start").parseString("start") # -> ['start']
  2088. Keyword("start").parseString("starting") # -> Exception
  2089. For case-insensitive matching, use L{CaselessKeyword}.
  2090. """
  2091. DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  2092. def __init__( self, matchString, identChars=None, caseless=False ):
  2093. super(Keyword,self).__init__()
  2094. if identChars is None:
  2095. identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2096. self.match = matchString
  2097. self.matchLen = len(matchString)
  2098. try:
  2099. self.firstMatchChar = matchString[0]
  2100. except IndexError:
  2101. warnings.warn("null string passed to Keyword; use Empty() instead",
  2102. SyntaxWarning, stacklevel=2)
  2103. self.name = '"%s"' % self.match
  2104. self.errmsg = "Expected " + self.name
  2105. self.mayReturnEmpty = False
  2106. self.mayIndexError = False
  2107. self.caseless = caseless
  2108. if caseless:
  2109. self.caselessmatch = matchString.upper()
  2110. identChars = identChars.upper()
  2111. self.identChars = set(identChars)
  2112. def parseImpl( self, instring, loc, doActions=True ):
  2113. if self.caseless:
  2114. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2115. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  2116. (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  2117. return loc+self.matchLen, self.match
  2118. else:
  2119. if (instring[loc] == self.firstMatchChar and
  2120. (self.matchLen==1 or instring.startswith(self.match,loc)) and
  2121. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  2122. (loc == 0 or instring[loc-1] not in self.identChars) ):
  2123. return loc+self.matchLen, self.match
  2124. raise ParseException(instring, loc, self.errmsg, self)
  2125. def copy(self):
  2126. c = super(Keyword,self).copy()
  2127. c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2128. return c
  2129. @staticmethod
  2130. def setDefaultKeywordChars( chars ):
  2131. """Overrides the default Keyword chars
  2132. """
  2133. Keyword.DEFAULT_KEYWORD_CHARS = chars
  2134. class CaselessLiteral(Literal):
  2135. """
  2136. Token to match a specified string, ignoring case of letters.
  2137. Note: the matched results will always be in the case of the given
  2138. match string, NOT the case of the input text.
  2139. Example::
  2140. OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
  2141. (Contrast with example for L{CaselessKeyword}.)
  2142. """
  2143. def __init__( self, matchString ):
  2144. super(CaselessLiteral,self).__init__( matchString.upper() )
  2145. # Preserve the defining literal.
  2146. self.returnString = matchString
  2147. self.name = "'%s'" % self.returnString
  2148. self.errmsg = "Expected " + self.name
  2149. def parseImpl( self, instring, loc, doActions=True ):
  2150. if instring[ loc:loc+self.matchLen ].upper() == self.match:
  2151. return loc+self.matchLen, self.returnString
  2152. raise ParseException(instring, loc, self.errmsg, self)
  2153. class CaselessKeyword(Keyword):
  2154. """
  2155. Caseless version of L{Keyword}.
  2156. Example::
  2157. OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
  2158. (Contrast with example for L{CaselessLiteral}.)
  2159. """
  2160. def __init__( self, matchString, identChars=None ):
  2161. super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  2162. def parseImpl( self, instring, loc, doActions=True ):
  2163. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2164. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
  2165. return loc+self.matchLen, self.match
  2166. raise ParseException(instring, loc, self.errmsg, self)
  2167. class CloseMatch(Token):
  2168. """
  2169. A variation on L{Literal} which matches "close" matches, that is,
  2170. strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
  2171. - C{match_string} - string to be matched
  2172. - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
  2173. The results from a successful parse will contain the matched text from the input string and the following named results:
  2174. - C{mismatches} - a list of the positions within the match_string where mismatches were found
  2175. - C{original} - the original match_string used to compare against the input string
  2176. If C{mismatches} is an empty list, then the match was an exact match.
  2177. Example::
  2178. patt = CloseMatch("ATCATCGAATGGA")
  2179. patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
  2180. patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
  2181. # exact match
  2182. patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
  2183. # close match allowing up to 2 mismatches
  2184. patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
  2185. patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
  2186. """
  2187. def __init__(self, match_string, maxMismatches=1):
  2188. super(CloseMatch,self).__init__()
  2189. self.name = match_string
  2190. self.match_string = match_string
  2191. self.maxMismatches = maxMismatches
  2192. self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
  2193. self.mayIndexError = False
  2194. self.mayReturnEmpty = False
  2195. def parseImpl( self, instring, loc, doActions=True ):
  2196. start = loc
  2197. instrlen = len(instring)
  2198. maxloc = start + len(self.match_string)
  2199. if maxloc <= instrlen:
  2200. match_string = self.match_string
  2201. match_stringloc = 0
  2202. mismatches = []
  2203. maxMismatches = self.maxMismatches
  2204. for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
  2205. src,mat = s_m
  2206. if src != mat:
  2207. mismatches.append(match_stringloc)
  2208. if len(mismatches) > maxMismatches:
  2209. break
  2210. else:
  2211. loc = match_stringloc + 1
  2212. results = ParseResults([instring[start:loc]])
  2213. results['original'] = self.match_string
  2214. results['mismatches'] = mismatches
  2215. return loc, results
  2216. raise ParseException(instring, loc, self.errmsg, self)
  2217. class Word(Token):
  2218. """
  2219. Token for matching words composed of allowed character sets.
  2220. Defined with string containing all allowed initial characters,
  2221. an optional string containing allowed body characters (if omitted,
  2222. defaults to the initial character set), and an optional minimum,
  2223. maximum, and/or exact length. The default value for C{min} is 1 (a
  2224. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2225. are 0, meaning no maximum or exact length restriction. An optional
  2226. C{excludeChars} parameter can list characters that might be found in
  2227. the input C{bodyChars} string; useful to define a word of all printables
  2228. except for one or two characters, for instance.
  2229. L{srange} is useful for defining custom character set strings for defining
  2230. C{Word} expressions, using range notation from regular expression character sets.
  2231. A common mistake is to use C{Word} to match a specific literal string, as in
  2232. C{Word("Address")}. Remember that C{Word} uses the string argument to define
  2233. I{sets} of matchable characters. This expression would match "Add", "AAA",
  2234. "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
  2235. To match an exact literal string, use L{Literal} or L{Keyword}.
  2236. pyparsing includes helper strings for building Words:
  2237. - L{alphas}
  2238. - L{nums}
  2239. - L{alphanums}
  2240. - L{hexnums}
  2241. - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
  2242. - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
  2243. - L{printables} (any non-whitespace character)
  2244. Example::
  2245. # a word composed of digits
  2246. integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
  2247. # a word with a leading capital, and zero or more lowercase
  2248. capital_word = Word(alphas.upper(), alphas.lower())
  2249. # hostnames are alphanumeric, with leading alpha, and '-'
  2250. hostname = Word(alphas, alphanums+'-')
  2251. # roman numeral (not a strict parser, accepts invalid mix of characters)
  2252. roman = Word("IVXLCDM")
  2253. # any string of non-whitespace characters, except for ','
  2254. csv_value = Word(printables, excludeChars=",")
  2255. """
  2256. def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
  2257. super(Word,self).__init__()
  2258. if excludeChars:
  2259. initChars = ''.join(c for c in initChars if c not in excludeChars)
  2260. if bodyChars:
  2261. bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
  2262. self.initCharsOrig = initChars
  2263. self.initChars = set(initChars)
  2264. if bodyChars :
  2265. self.bodyCharsOrig = bodyChars
  2266. self.bodyChars = set(bodyChars)
  2267. else:
  2268. self.bodyCharsOrig = initChars
  2269. self.bodyChars = set(initChars)
  2270. self.maxSpecified = max > 0
  2271. if min < 1:
  2272. raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
  2273. self.minLen = min
  2274. if max > 0:
  2275. self.maxLen = max
  2276. else:
  2277. self.maxLen = _MAX_INT
  2278. if exact > 0:
  2279. self.maxLen = exact
  2280. self.minLen = exact
  2281. self.name = _ustr(self)
  2282. self.errmsg = "Expected " + self.name
  2283. self.mayIndexError = False
  2284. self.asKeyword = asKeyword
  2285. if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  2286. if self.bodyCharsOrig == self.initCharsOrig:
  2287. self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  2288. elif len(self.initCharsOrig) == 1:
  2289. self.reString = "%s[%s]*" % \
  2290. (re.escape(self.initCharsOrig),
  2291. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2292. else:
  2293. self.reString = "[%s][%s]*" % \
  2294. (_escapeRegexRangeChars(self.initCharsOrig),
  2295. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2296. if self.asKeyword:
  2297. self.reString = r"\b"+self.reString+r"\b"
  2298. try:
  2299. self.re = re.compile( self.reString )
  2300. except Exception:
  2301. self.re = None
  2302. def parseImpl( self, instring, loc, doActions=True ):
  2303. if self.re:
  2304. result = self.re.match(instring,loc)
  2305. if not result:
  2306. raise ParseException(instring, loc, self.errmsg, self)
  2307. loc = result.end()
  2308. return loc, result.group()
  2309. if not(instring[ loc ] in self.initChars):
  2310. raise ParseException(instring, loc, self.errmsg, self)
  2311. start = loc
  2312. loc += 1
  2313. instrlen = len(instring)
  2314. bodychars = self.bodyChars
  2315. maxloc = start + self.maxLen
  2316. maxloc = min( maxloc, instrlen )
  2317. while loc < maxloc and instring[loc] in bodychars:
  2318. loc += 1
  2319. throwException = False
  2320. if loc - start < self.minLen:
  2321. throwException = True
  2322. if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  2323. throwException = True
  2324. if self.asKeyword:
  2325. if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
  2326. throwException = True
  2327. if throwException:
  2328. raise ParseException(instring, loc, self.errmsg, self)
  2329. return loc, instring[start:loc]
  2330. def __str__( self ):
  2331. try:
  2332. return super(Word,self).__str__()
  2333. except Exception:
  2334. pass
  2335. if self.strRepr is None:
  2336. def charsAsStr(s):
  2337. if len(s)>4:
  2338. return s[:4]+"..."
  2339. else:
  2340. return s
  2341. if ( self.initCharsOrig != self.bodyCharsOrig ):
  2342. self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  2343. else:
  2344. self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  2345. return self.strRepr
  2346. class Regex(Token):
  2347. """
  2348. Token for matching strings that match a given regular expression.
  2349. Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
  2350. If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
  2351. named parse results.
  2352. Example::
  2353. realnum = Regex(r"[+-]?\d+\.\d*")
  2354. date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
  2355. # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
  2356. roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
  2357. """
  2358. compiledREtype = type(re.compile("[A-Z]"))
  2359. def __init__( self, pattern, flags=0):
  2360. """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
  2361. super(Regex,self).__init__()
  2362. if isinstance(pattern, basestring):
  2363. if not pattern:
  2364. warnings.warn("null string passed to Regex; use Empty() instead",
  2365. SyntaxWarning, stacklevel=2)
  2366. self.pattern = pattern
  2367. self.flags = flags
  2368. try:
  2369. self.re = re.compile(self.pattern, self.flags)
  2370. self.reString = self.pattern
  2371. except sre_constants.error:
  2372. warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
  2373. SyntaxWarning, stacklevel=2)
  2374. raise
  2375. elif isinstance(pattern, Regex.compiledREtype):
  2376. self.re = pattern
  2377. self.pattern = \
  2378. self.reString = str(pattern)
  2379. self.flags = flags
  2380. else:
  2381. raise ValueError("Regex may only be constructed with a string or a compiled RE object")
  2382. self.name = _ustr(self)
  2383. self.errmsg = "Expected " + self.name
  2384. self.mayIndexError = False
  2385. self.mayReturnEmpty = True
  2386. def parseImpl( self, instring, loc, doActions=True ):
  2387. result = self.re.match(instring,loc)
  2388. if not result:
  2389. raise ParseException(instring, loc, self.errmsg, self)
  2390. loc = result.end()
  2391. d = result.groupdict()
  2392. ret = ParseResults(result.group())
  2393. if d:
  2394. for k in d:
  2395. ret[k] = d[k]
  2396. return loc,ret
  2397. def __str__( self ):
  2398. try:
  2399. return super(Regex,self).__str__()
  2400. except Exception:
  2401. pass
  2402. if self.strRepr is None:
  2403. self.strRepr = "Re:(%s)" % repr(self.pattern)
  2404. return self.strRepr
  2405. class QuotedString(Token):
  2406. r"""
  2407. Token for matching strings that are delimited by quoting characters.
  2408. Defined with the following parameters:
  2409. - quoteChar - string of one or more characters defining the quote delimiting string
  2410. - escChar - character to escape quotes, typically backslash (default=C{None})
  2411. - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
  2412. - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
  2413. - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
  2414. - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
  2415. - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
  2416. Example::
  2417. qs = QuotedString('"')
  2418. print(qs.searchString('lsjdf "This is the quote" sldjf'))
  2419. complex_qs = QuotedString('{{', endQuoteChar='}}')
  2420. print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
  2421. sql_qs = QuotedString('"', escQuote='""')
  2422. print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
  2423. prints::
  2424. [['This is the quote']]
  2425. [['This is the "quote"']]
  2426. [['This is the quote with "embedded" quotes']]
  2427. """
  2428. def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
  2429. super(QuotedString,self).__init__()
  2430. # remove white space from quote chars - wont work anyway
  2431. quoteChar = quoteChar.strip()
  2432. if not quoteChar:
  2433. warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2434. raise SyntaxError()
  2435. if endQuoteChar is None:
  2436. endQuoteChar = quoteChar
  2437. else:
  2438. endQuoteChar = endQuoteChar.strip()
  2439. if not endQuoteChar:
  2440. warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2441. raise SyntaxError()
  2442. self.quoteChar = quoteChar
  2443. self.quoteCharLen = len(quoteChar)
  2444. self.firstQuoteChar = quoteChar[0]
  2445. self.endQuoteChar = endQuoteChar
  2446. self.endQuoteCharLen = len(endQuoteChar)
  2447. self.escChar = escChar
  2448. self.escQuote = escQuote
  2449. self.unquoteResults = unquoteResults
  2450. self.convertWhitespaceEscapes = convertWhitespaceEscapes
  2451. if multiline:
  2452. self.flags = re.MULTILINE | re.DOTALL
  2453. self.pattern = r'%s(?:[^%s%s]' % \
  2454. ( re.escape(self.quoteChar),
  2455. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2456. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2457. else:
  2458. self.flags = 0
  2459. self.pattern = r'%s(?:[^%s\n\r%s]' % \
  2460. ( re.escape(self.quoteChar),
  2461. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2462. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2463. if len(self.endQuoteChar) > 1:
  2464. self.pattern += (
  2465. '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  2466. _escapeRegexRangeChars(self.endQuoteChar[i]))
  2467. for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
  2468. )
  2469. if escQuote:
  2470. self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  2471. if escChar:
  2472. self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  2473. self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  2474. self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  2475. try:
  2476. self.re = re.compile(self.pattern, self.flags)
  2477. self.reString = self.pattern
  2478. except sre_constants.error:
  2479. warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
  2480. SyntaxWarning, stacklevel=2)
  2481. raise
  2482. self.name = _ustr(self)
  2483. self.errmsg = "Expected " + self.name
  2484. self.mayIndexError = False
  2485. self.mayReturnEmpty = True
  2486. def parseImpl( self, instring, loc, doActions=True ):
  2487. result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  2488. if not result:
  2489. raise ParseException(instring, loc, self.errmsg, self)
  2490. loc = result.end()
  2491. ret = result.group()
  2492. if self.unquoteResults:
  2493. # strip off quotes
  2494. ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  2495. if isinstance(ret,basestring):
  2496. # replace escaped whitespace
  2497. if '\\' in ret and self.convertWhitespaceEscapes:
  2498. ws_map = {
  2499. r'\t' : '\t',
  2500. r'\n' : '\n',
  2501. r'\f' : '\f',
  2502. r'\r' : '\r',
  2503. }
  2504. for wslit,wschar in ws_map.items():
  2505. ret = ret.replace(wslit, wschar)
  2506. # replace escaped characters
  2507. if self.escChar:
  2508. ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
  2509. # replace escaped quotes
  2510. if self.escQuote:
  2511. ret = ret.replace(self.escQuote, self.endQuoteChar)
  2512. return loc, ret
  2513. def __str__( self ):
  2514. try:
  2515. return super(QuotedString,self).__str__()
  2516. except Exception:
  2517. pass
  2518. if self.strRepr is None:
  2519. self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  2520. return self.strRepr
  2521. class CharsNotIn(Token):
  2522. """
  2523. Token for matching words composed of characters I{not} in a given set (will
  2524. include whitespace in matched characters if not listed in the provided exclusion set - see example).
  2525. Defined with string containing all disallowed characters, and an optional
  2526. minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
  2527. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2528. are 0, meaning no maximum or exact length restriction.
  2529. Example::
  2530. # define a comma-separated-value as anything that is not a ','
  2531. csv_value = CharsNotIn(',')
  2532. print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
  2533. prints::
  2534. ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
  2535. """
  2536. def __init__( self, notChars, min=1, max=0, exact=0 ):
  2537. super(CharsNotIn,self).__init__()
  2538. self.skipWhitespace = False
  2539. self.notChars = notChars
  2540. if min < 1:
  2541. raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
  2542. self.minLen = min
  2543. if max > 0:
  2544. self.maxLen = max
  2545. else:
  2546. self.maxLen = _MAX_INT
  2547. if exact > 0:
  2548. self.maxLen = exact
  2549. self.minLen = exact
  2550. self.name = _ustr(self)
  2551. self.errmsg = "Expected " + self.name
  2552. self.mayReturnEmpty = ( self.minLen == 0 )
  2553. self.mayIndexError = False
  2554. def parseImpl( self, instring, loc, doActions=True ):
  2555. if instring[loc] in self.notChars:
  2556. raise ParseException(instring, loc, self.errmsg, self)
  2557. start = loc
  2558. loc += 1
  2559. notchars = self.notChars
  2560. maxlen = min( start+self.maxLen, len(instring) )
  2561. while loc < maxlen and \
  2562. (instring[loc] not in notchars):
  2563. loc += 1
  2564. if loc - start < self.minLen:
  2565. raise ParseException(instring, loc, self.errmsg, self)
  2566. return loc, instring[start:loc]
  2567. def __str__( self ):
  2568. try:
  2569. return super(CharsNotIn, self).__str__()
  2570. except Exception:
  2571. pass
  2572. if self.strRepr is None:
  2573. if len(self.notChars) > 4:
  2574. self.strRepr = "!W:(%s...)" % self.notChars[:4]
  2575. else:
  2576. self.strRepr = "!W:(%s)" % self.notChars
  2577. return self.strRepr
  2578. class White(Token):
  2579. """
  2580. Special matching class for matching whitespace. Normally, whitespace is ignored
  2581. by pyparsing grammars. This class is included when some whitespace structures
  2582. are significant. Define with a string containing the whitespace characters to be
  2583. matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
  2584. as defined for the C{L{Word}} class.
  2585. """
  2586. whiteStrs = {
  2587. " " : "<SPC>",
  2588. "\t": "<TAB>",
  2589. "\n": "<LF>",
  2590. "\r": "<CR>",
  2591. "\f": "<FF>",
  2592. }
  2593. def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  2594. super(White,self).__init__()
  2595. self.matchWhite = ws
  2596. self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
  2597. #~ self.leaveWhitespace()
  2598. self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
  2599. self.mayReturnEmpty = True
  2600. self.errmsg = "Expected " + self.name
  2601. self.minLen = min
  2602. if max > 0:
  2603. self.maxLen = max
  2604. else:
  2605. self.maxLen = _MAX_INT
  2606. if exact > 0:
  2607. self.maxLen = exact
  2608. self.minLen = exact
  2609. def parseImpl( self, instring, loc, doActions=True ):
  2610. if not(instring[ loc ] in self.matchWhite):
  2611. raise ParseException(instring, loc, self.errmsg, self)
  2612. start = loc
  2613. loc += 1
  2614. maxloc = start + self.maxLen
  2615. maxloc = min( maxloc, len(instring) )
  2616. while loc < maxloc and instring[loc] in self.matchWhite:
  2617. loc += 1
  2618. if loc - start < self.minLen:
  2619. raise ParseException(instring, loc, self.errmsg, self)
  2620. return loc, instring[start:loc]
  2621. class _PositionToken(Token):
  2622. def __init__( self ):
  2623. super(_PositionToken,self).__init__()
  2624. self.name=self.__class__.__name__
  2625. self.mayReturnEmpty = True
  2626. self.mayIndexError = False
  2627. class GoToColumn(_PositionToken):
  2628. """
  2629. Token to advance to a specific column of input text; useful for tabular report scraping.
  2630. """
  2631. def __init__( self, colno ):
  2632. super(GoToColumn,self).__init__()
  2633. self.col = colno
  2634. def preParse( self, instring, loc ):
  2635. if col(loc,instring) != self.col:
  2636. instrlen = len(instring)
  2637. if self.ignoreExprs:
  2638. loc = self._skipIgnorables( instring, loc )
  2639. while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  2640. loc += 1
  2641. return loc
  2642. def parseImpl( self, instring, loc, doActions=True ):
  2643. thiscol = col( loc, instring )
  2644. if thiscol > self.col:
  2645. raise ParseException( instring, loc, "Text not in expected column", self )
  2646. newloc = loc + self.col - thiscol
  2647. ret = instring[ loc: newloc ]
  2648. return newloc, ret
  2649. class LineStart(_PositionToken):
  2650. """
  2651. Matches if current position is at the beginning of a line within the parse string
  2652. Example::
  2653. test = '''\
  2654. AAA this line
  2655. AAA and this line
  2656. AAA but not this one
  2657. B AAA and definitely not this one
  2658. '''
  2659. for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
  2660. print(t)
  2661. Prints::
  2662. ['AAA', ' this line']
  2663. ['AAA', ' and this line']
  2664. """
  2665. def __init__( self ):
  2666. super(LineStart,self).__init__()
  2667. self.errmsg = "Expected start of line"
  2668. def parseImpl( self, instring, loc, doActions=True ):
  2669. if col(loc, instring) == 1:
  2670. return loc, []
  2671. raise ParseException(instring, loc, self.errmsg, self)
  2672. class LineEnd(_PositionToken):
  2673. """
  2674. Matches if current position is at the end of a line within the parse string
  2675. """
  2676. def __init__( self ):
  2677. super(LineEnd,self).__init__()
  2678. self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
  2679. self.errmsg = "Expected end of line"
  2680. def parseImpl( self, instring, loc, doActions=True ):
  2681. if loc<len(instring):
  2682. if instring[loc] == "\n":
  2683. return loc+1, "\n"
  2684. else:
  2685. raise ParseException(instring, loc, self.errmsg, self)
  2686. elif loc == len(instring):
  2687. return loc+1, []
  2688. else:
  2689. raise ParseException(instring, loc, self.errmsg, self)
  2690. class StringStart(_PositionToken):
  2691. """
  2692. Matches if current position is at the beginning of the parse string
  2693. """
  2694. def __init__( self ):
  2695. super(StringStart,self).__init__()
  2696. self.errmsg = "Expected start of text"
  2697. def parseImpl( self, instring, loc, doActions=True ):
  2698. if loc != 0:
  2699. # see if entire string up to here is just whitespace and ignoreables
  2700. if loc != self.preParse( instring, 0 ):
  2701. raise ParseException(instring, loc, self.errmsg, self)
  2702. return loc, []
  2703. class StringEnd(_PositionToken):
  2704. """
  2705. Matches if current position is at the end of the parse string
  2706. """
  2707. def __init__( self ):
  2708. super(StringEnd,self).__init__()
  2709. self.errmsg = "Expected end of text"
  2710. def parseImpl( self, instring, loc, doActions=True ):
  2711. if loc < len(instring):
  2712. raise ParseException(instring, loc, self.errmsg, self)
  2713. elif loc == len(instring):
  2714. return loc+1, []
  2715. elif loc > len(instring):
  2716. return loc, []
  2717. else:
  2718. raise ParseException(instring, loc, self.errmsg, self)
  2719. class WordStart(_PositionToken):
  2720. """
  2721. Matches if the current position is at the beginning of a Word, and
  2722. is not preceded by any character in a given set of C{wordChars}
  2723. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2724. use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
  2725. the string being parsed, or at the beginning of a line.
  2726. """
  2727. def __init__(self, wordChars = printables):
  2728. super(WordStart,self).__init__()
  2729. self.wordChars = set(wordChars)
  2730. self.errmsg = "Not at the start of a word"
  2731. def parseImpl(self, instring, loc, doActions=True ):
  2732. if loc != 0:
  2733. if (instring[loc-1] in self.wordChars or
  2734. instring[loc] not in self.wordChars):
  2735. raise ParseException(instring, loc, self.errmsg, self)
  2736. return loc, []
  2737. class WordEnd(_PositionToken):
  2738. """
  2739. Matches if the current position is at the end of a Word, and
  2740. is not followed by any character in a given set of C{wordChars}
  2741. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2742. use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
  2743. the string being parsed, or at the end of a line.
  2744. """
  2745. def __init__(self, wordChars = printables):
  2746. super(WordEnd,self).__init__()
  2747. self.wordChars = set(wordChars)
  2748. self.skipWhitespace = False
  2749. self.errmsg = "Not at the end of a word"
  2750. def parseImpl(self, instring, loc, doActions=True ):
  2751. instrlen = len(instring)
  2752. if instrlen>0 and loc<instrlen:
  2753. if (instring[loc] in self.wordChars or
  2754. instring[loc-1] not in self.wordChars):
  2755. raise ParseException(instring, loc, self.errmsg, self)
  2756. return loc, []
  2757. class ParseExpression(ParserElement):
  2758. """
  2759. Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
  2760. """
  2761. def __init__( self, exprs, savelist = False ):
  2762. super(ParseExpression,self).__init__(savelist)
  2763. if isinstance( exprs, _generatorType ):
  2764. exprs = list(exprs)
  2765. if isinstance( exprs, basestring ):
  2766. self.exprs = [ ParserElement._literalStringClass( exprs ) ]
  2767. elif isinstance( exprs, collections.Iterable ):
  2768. exprs = list(exprs)
  2769. # if sequence of strings provided, wrap with Literal
  2770. if all(isinstance(expr, basestring) for expr in exprs):
  2771. exprs = map(ParserElement._literalStringClass, exprs)
  2772. self.exprs = list(exprs)
  2773. else:
  2774. try:
  2775. self.exprs = list( exprs )
  2776. except TypeError:
  2777. self.exprs = [ exprs ]
  2778. self.callPreparse = False
  2779. def __getitem__( self, i ):
  2780. return self.exprs[i]
  2781. def append( self, other ):
  2782. self.exprs.append( other )
  2783. self.strRepr = None
  2784. return self
  2785. def leaveWhitespace( self ):
  2786. """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
  2787. all contained expressions."""
  2788. self.skipWhitespace = False
  2789. self.exprs = [ e.copy() for e in self.exprs ]
  2790. for e in self.exprs:
  2791. e.leaveWhitespace()
  2792. return self
  2793. def ignore( self, other ):
  2794. if isinstance( other, Suppress ):
  2795. if other not in self.ignoreExprs:
  2796. super( ParseExpression, self).ignore( other )
  2797. for e in self.exprs:
  2798. e.ignore( self.ignoreExprs[-1] )
  2799. else:
  2800. super( ParseExpression, self).ignore( other )
  2801. for e in self.exprs:
  2802. e.ignore( self.ignoreExprs[-1] )
  2803. return self
  2804. def __str__( self ):
  2805. try:
  2806. return super(ParseExpression,self).__str__()
  2807. except Exception:
  2808. pass
  2809. if self.strRepr is None:
  2810. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  2811. return self.strRepr
  2812. def streamline( self ):
  2813. super(ParseExpression,self).streamline()
  2814. for e in self.exprs:
  2815. e.streamline()
  2816. # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  2817. # but only if there are no parse actions or resultsNames on the nested And's
  2818. # (likewise for Or's and MatchFirst's)
  2819. if ( len(self.exprs) == 2 ):
  2820. other = self.exprs[0]
  2821. if ( isinstance( other, self.__class__ ) and
  2822. not(other.parseAction) and
  2823. other.resultsName is None and
  2824. not other.debug ):
  2825. self.exprs = other.exprs[:] + [ self.exprs[1] ]
  2826. self.strRepr = None
  2827. self.mayReturnEmpty |= other.mayReturnEmpty
  2828. self.mayIndexError |= other.mayIndexError
  2829. other = self.exprs[-1]
  2830. if ( isinstance( other, self.__class__ ) and
  2831. not(other.parseAction) and
  2832. other.resultsName is None and
  2833. not other.debug ):
  2834. self.exprs = self.exprs[:-1] + other.exprs[:]
  2835. self.strRepr = None
  2836. self.mayReturnEmpty |= other.mayReturnEmpty
  2837. self.mayIndexError |= other.mayIndexError
  2838. self.errmsg = "Expected " + _ustr(self)
  2839. return self
  2840. def setResultsName( self, name, listAllMatches=False ):
  2841. ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
  2842. return ret
  2843. def validate( self, validateTrace=[] ):
  2844. tmp = validateTrace[:]+[self]
  2845. for e in self.exprs:
  2846. e.validate(tmp)
  2847. self.checkRecursion( [] )
  2848. def copy(self):
  2849. ret = super(ParseExpression,self).copy()
  2850. ret.exprs = [e.copy() for e in self.exprs]
  2851. return ret
  2852. class And(ParseExpression):
  2853. """
  2854. Requires all given C{ParseExpression}s to be found in the given order.
  2855. Expressions may be separated by whitespace.
  2856. May be constructed using the C{'+'} operator.
  2857. May also be constructed using the C{'-'} operator, which will suppress backtracking.
  2858. Example::
  2859. integer = Word(nums)
  2860. name_expr = OneOrMore(Word(alphas))
  2861. expr = And([integer("id"),name_expr("name"),integer("age")])
  2862. # more easily written as:
  2863. expr = integer("id") + name_expr("name") + integer("age")
  2864. """
  2865. class _ErrorStop(Empty):
  2866. def __init__(self, *args, **kwargs):
  2867. super(And._ErrorStop,self).__init__(*args, **kwargs)
  2868. self.name = '-'
  2869. self.leaveWhitespace()
  2870. def __init__( self, exprs, savelist = True ):
  2871. super(And,self).__init__(exprs, savelist)
  2872. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  2873. self.setWhitespaceChars( self.exprs[0].whiteChars )
  2874. self.skipWhitespace = self.exprs[0].skipWhitespace
  2875. self.callPreparse = True
  2876. def parseImpl( self, instring, loc, doActions=True ):
  2877. # pass False as last arg to _parse for first element, since we already
  2878. # pre-parsed the string as part of our And pre-parsing
  2879. loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  2880. errorStop = False
  2881. for e in self.exprs[1:]:
  2882. if isinstance(e, And._ErrorStop):
  2883. errorStop = True
  2884. continue
  2885. if errorStop:
  2886. try:
  2887. loc, exprtokens = e._parse( instring, loc, doActions )
  2888. except ParseSyntaxException:
  2889. raise
  2890. except ParseBaseException as pe:
  2891. pe.__traceback__ = None
  2892. raise ParseSyntaxException._from_exception(pe)
  2893. except IndexError:
  2894. raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
  2895. else:
  2896. loc, exprtokens = e._parse( instring, loc, doActions )
  2897. if exprtokens or exprtokens.haskeys():
  2898. resultlist += exprtokens
  2899. return loc, resultlist
  2900. def __iadd__(self, other ):
  2901. if isinstance( other, basestring ):
  2902. other = ParserElement._literalStringClass( other )
  2903. return self.append( other ) #And( [ self, other ] )
  2904. def checkRecursion( self, parseElementList ):
  2905. subRecCheckList = parseElementList[:] + [ self ]
  2906. for e in self.exprs:
  2907. e.checkRecursion( subRecCheckList )
  2908. if not e.mayReturnEmpty:
  2909. break
  2910. def __str__( self ):
  2911. if hasattr(self,"name"):
  2912. return self.name
  2913. if self.strRepr is None:
  2914. self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
  2915. return self.strRepr
  2916. class Or(ParseExpression):
  2917. """
  2918. Requires that at least one C{ParseExpression} is found.
  2919. If two expressions match, the expression that matches the longest string will be used.
  2920. May be constructed using the C{'^'} operator.
  2921. Example::
  2922. # construct Or using '^' operator
  2923. number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
  2924. print(number.searchString("123 3.1416 789"))
  2925. prints::
  2926. [['123'], ['3.1416'], ['789']]
  2927. """
  2928. def __init__( self, exprs, savelist = False ):
  2929. super(Or,self).__init__(exprs, savelist)
  2930. if self.exprs:
  2931. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  2932. else:
  2933. self.mayReturnEmpty = True
  2934. def parseImpl( self, instring, loc, doActions=True ):
  2935. maxExcLoc = -1
  2936. maxException = None
  2937. matches = []
  2938. for e in self.exprs:
  2939. try:
  2940. loc2 = e.tryParse( instring, loc )
  2941. except ParseException as err:
  2942. err.__traceback__ = None
  2943. if err.loc > maxExcLoc:
  2944. maxException = err
  2945. maxExcLoc = err.loc
  2946. except IndexError:
  2947. if len(instring) > maxExcLoc:
  2948. maxException = ParseException(instring,len(instring),e.errmsg,self)
  2949. maxExcLoc = len(instring)
  2950. else:
  2951. # save match among all matches, to retry longest to shortest
  2952. matches.append((loc2, e))
  2953. if matches:
  2954. matches.sort(key=lambda x: -x[0])
  2955. for _,e in matches:
  2956. try:
  2957. return e._parse( instring, loc, doActions )
  2958. except ParseException as err:
  2959. err.__traceback__ = None
  2960. if err.loc > maxExcLoc:
  2961. maxException = err
  2962. maxExcLoc = err.loc
  2963. if maxException is not None:
  2964. maxException.msg = self.errmsg
  2965. raise maxException
  2966. else:
  2967. raise ParseException(instring, loc, "no defined alternatives to match", self)
  2968. def __ixor__(self, other ):
  2969. if isinstance( other, basestring ):
  2970. other = ParserElement._literalStringClass( other )
  2971. return self.append( other ) #Or( [ self, other ] )
  2972. def __str__( self ):
  2973. if hasattr(self,"name"):
  2974. return self.name
  2975. if self.strRepr is None:
  2976. self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
  2977. return self.strRepr
  2978. def checkRecursion( self, parseElementList ):
  2979. subRecCheckList = parseElementList[:] + [ self ]
  2980. for e in self.exprs:
  2981. e.checkRecursion( subRecCheckList )
  2982. class MatchFirst(ParseExpression):
  2983. """
  2984. Requires that at least one C{ParseExpression} is found.
  2985. If two expressions match, the first one listed is the one that will match.
  2986. May be constructed using the C{'|'} operator.
  2987. Example::
  2988. # construct MatchFirst using '|' operator
  2989. # watch the order of expressions to match
  2990. number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
  2991. print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
  2992. # put more selective expression first
  2993. number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
  2994. print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
  2995. """
  2996. def __init__( self, exprs, savelist = False ):
  2997. super(MatchFirst,self).__init__(exprs, savelist)
  2998. if self.exprs:
  2999. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3000. else:
  3001. self.mayReturnEmpty = True
  3002. def parseImpl( self, instring, loc, doActions=True ):
  3003. maxExcLoc = -1
  3004. maxException = None
  3005. for e in self.exprs:
  3006. try:
  3007. ret = e._parse( instring, loc, doActions )
  3008. return ret
  3009. except ParseException as err:
  3010. if err.loc > maxExcLoc:
  3011. maxException = err
  3012. maxExcLoc = err.loc
  3013. except IndexError:
  3014. if len(instring) > maxExcLoc:
  3015. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3016. maxExcLoc = len(instring)
  3017. # only got here if no expression matched, raise exception for match that made it the furthest
  3018. else:
  3019. if maxException is not None:
  3020. maxException.msg = self.errmsg
  3021. raise maxException
  3022. else:
  3023. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3024. def __ior__(self, other ):
  3025. if isinstance( other, basestring ):
  3026. other = ParserElement._literalStringClass( other )
  3027. return self.append( other ) #MatchFirst( [ self, other ] )
  3028. def __str__( self ):
  3029. if hasattr(self,"name"):
  3030. return self.name
  3031. if self.strRepr is None:
  3032. self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
  3033. return self.strRepr
  3034. def checkRecursion( self, parseElementList ):
  3035. subRecCheckList = parseElementList[:] + [ self ]
  3036. for e in self.exprs:
  3037. e.checkRecursion( subRecCheckList )
  3038. class Each(ParseExpression):
  3039. """
  3040. Requires all given C{ParseExpression}s to be found, but in any order.
  3041. Expressions may be separated by whitespace.
  3042. May be constructed using the C{'&'} operator.
  3043. Example::
  3044. color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
  3045. shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
  3046. integer = Word(nums)
  3047. shape_attr = "shape:" + shape_type("shape")
  3048. posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
  3049. color_attr = "color:" + color("color")
  3050. size_attr = "size:" + integer("size")
  3051. # use Each (using operator '&') to accept attributes in any order
  3052. # (shape and posn are required, color and size are optional)
  3053. shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
  3054. shape_spec.runTests('''
  3055. shape: SQUARE color: BLACK posn: 100, 120
  3056. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3057. color:GREEN size:20 shape:TRIANGLE posn:20,40
  3058. '''
  3059. )
  3060. prints::
  3061. shape: SQUARE color: BLACK posn: 100, 120
  3062. ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
  3063. - color: BLACK
  3064. - posn: ['100', ',', '120']
  3065. - x: 100
  3066. - y: 120
  3067. - shape: SQUARE
  3068. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3069. ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
  3070. - color: BLUE
  3071. - posn: ['50', ',', '80']
  3072. - x: 50
  3073. - y: 80
  3074. - shape: CIRCLE
  3075. - size: 50
  3076. color: GREEN size: 20 shape: TRIANGLE posn: 20,40
  3077. ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
  3078. - color: GREEN
  3079. - posn: ['20', ',', '40']
  3080. - x: 20
  3081. - y: 40
  3082. - shape: TRIANGLE
  3083. - size: 20
  3084. """
  3085. def __init__( self, exprs, savelist = True ):
  3086. super(Each,self).__init__(exprs, savelist)
  3087. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3088. self.skipWhitespace = True
  3089. self.initExprGroups = True
  3090. def parseImpl( self, instring, loc, doActions=True ):
  3091. if self.initExprGroups:
  3092. self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
  3093. opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
  3094. opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
  3095. self.optionals = opt1 + opt2
  3096. self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
  3097. self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
  3098. self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  3099. self.required += self.multirequired
  3100. self.initExprGroups = False
  3101. tmpLoc = loc
  3102. tmpReqd = self.required[:]
  3103. tmpOpt = self.optionals[:]
  3104. matchOrder = []
  3105. keepMatching = True
  3106. while keepMatching:
  3107. tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  3108. failed = []
  3109. for e in tmpExprs:
  3110. try:
  3111. tmpLoc = e.tryParse( instring, tmpLoc )
  3112. except ParseException:
  3113. failed.append(e)
  3114. else:
  3115. matchOrder.append(self.opt1map.get(id(e),e))
  3116. if e in tmpReqd:
  3117. tmpReqd.remove(e)
  3118. elif e in tmpOpt:
  3119. tmpOpt.remove(e)
  3120. if len(failed) == len(tmpExprs):
  3121. keepMatching = False
  3122. if tmpReqd:
  3123. missing = ", ".join(_ustr(e) for e in tmpReqd)
  3124. raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  3125. # add any unmatched Optionals, in case they have default values defined
  3126. matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
  3127. resultlist = []
  3128. for e in matchOrder:
  3129. loc,results = e._parse(instring,loc,doActions)
  3130. resultlist.append(results)
  3131. finalResults = sum(resultlist, ParseResults([]))
  3132. return loc, finalResults
  3133. def __str__( self ):
  3134. if hasattr(self,"name"):
  3135. return self.name
  3136. if self.strRepr is None:
  3137. self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
  3138. return self.strRepr
  3139. def checkRecursion( self, parseElementList ):
  3140. subRecCheckList = parseElementList[:] + [ self ]
  3141. for e in self.exprs:
  3142. e.checkRecursion( subRecCheckList )
  3143. class ParseElementEnhance(ParserElement):
  3144. """
  3145. Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
  3146. """
  3147. def __init__( self, expr, savelist=False ):
  3148. super(ParseElementEnhance,self).__init__(savelist)
  3149. if isinstance( expr, basestring ):
  3150. if issubclass(ParserElement._literalStringClass, Token):
  3151. expr = ParserElement._literalStringClass(expr)
  3152. else:
  3153. expr = ParserElement._literalStringClass(Literal(expr))
  3154. self.expr = expr
  3155. self.strRepr = None
  3156. if expr is not None:
  3157. self.mayIndexError = expr.mayIndexError
  3158. self.mayReturnEmpty = expr.mayReturnEmpty
  3159. self.setWhitespaceChars( expr.whiteChars )
  3160. self.skipWhitespace = expr.skipWhitespace
  3161. self.saveAsList = expr.saveAsList
  3162. self.callPreparse = expr.callPreparse
  3163. self.ignoreExprs.extend(expr.ignoreExprs)
  3164. def parseImpl( self, instring, loc, doActions=True ):
  3165. if self.expr is not None:
  3166. return self.expr._parse( instring, loc, doActions, callPreParse=False )
  3167. else:
  3168. raise ParseException("",loc,self.errmsg,self)
  3169. def leaveWhitespace( self ):
  3170. self.skipWhitespace = False
  3171. self.expr = self.expr.copy()
  3172. if self.expr is not None:
  3173. self.expr.leaveWhitespace()
  3174. return self
  3175. def ignore( self, other ):
  3176. if isinstance( other, Suppress ):
  3177. if other not in self.ignoreExprs:
  3178. super( ParseElementEnhance, self).ignore( other )
  3179. if self.expr is not None:
  3180. self.expr.ignore( self.ignoreExprs[-1] )
  3181. else:
  3182. super( ParseElementEnhance, self).ignore( other )
  3183. if self.expr is not None:
  3184. self.expr.ignore( self.ignoreExprs[-1] )
  3185. return self
  3186. def streamline( self ):
  3187. super(ParseElementEnhance,self).streamline()
  3188. if self.expr is not None:
  3189. self.expr.streamline()
  3190. return self
  3191. def checkRecursion( self, parseElementList ):
  3192. if self in parseElementList:
  3193. raise RecursiveGrammarException( parseElementList+[self] )
  3194. subRecCheckList = parseElementList[:] + [ self ]
  3195. if self.expr is not None:
  3196. self.expr.checkRecursion( subRecCheckList )
  3197. def validate( self, validateTrace=[] ):
  3198. tmp = validateTrace[:]+[self]
  3199. if self.expr is not None:
  3200. self.expr.validate(tmp)
  3201. self.checkRecursion( [] )
  3202. def __str__( self ):
  3203. try:
  3204. return super(ParseElementEnhance,self).__str__()
  3205. except Exception:
  3206. pass
  3207. if self.strRepr is None and self.expr is not None:
  3208. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  3209. return self.strRepr
  3210. class FollowedBy(ParseElementEnhance):
  3211. """
  3212. Lookahead matching of the given parse expression. C{FollowedBy}
  3213. does I{not} advance the parsing position within the input string, it only
  3214. verifies that the specified parse expression matches at the current
  3215. position. C{FollowedBy} always returns a null token list.
  3216. Example::
  3217. # use FollowedBy to match a label only if it is followed by a ':'
  3218. data_word = Word(alphas)
  3219. label = data_word + FollowedBy(':')
  3220. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3221. OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
  3222. prints::
  3223. [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
  3224. """
  3225. def __init__( self, expr ):
  3226. super(FollowedBy,self).__init__(expr)
  3227. self.mayReturnEmpty = True
  3228. def parseImpl( self, instring, loc, doActions=True ):
  3229. self.expr.tryParse( instring, loc )
  3230. return loc, []
  3231. class NotAny(ParseElementEnhance):
  3232. """
  3233. Lookahead to disallow matching with the given parse expression. C{NotAny}
  3234. does I{not} advance the parsing position within the input string, it only
  3235. verifies that the specified parse expression does I{not} match at the current
  3236. position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
  3237. always returns a null token list. May be constructed using the '~' operator.
  3238. Example::
  3239. """
  3240. def __init__( self, expr ):
  3241. super(NotAny,self).__init__(expr)
  3242. #~ self.leaveWhitespace()
  3243. self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  3244. self.mayReturnEmpty = True
  3245. self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  3246. def parseImpl( self, instring, loc, doActions=True ):
  3247. if self.expr.canParseNext(instring, loc):
  3248. raise ParseException(instring, loc, self.errmsg, self)
  3249. return loc, []
  3250. def __str__( self ):
  3251. if hasattr(self,"name"):
  3252. return self.name
  3253. if self.strRepr is None:
  3254. self.strRepr = "~{" + _ustr(self.expr) + "}"
  3255. return self.strRepr
  3256. class _MultipleMatch(ParseElementEnhance):
  3257. def __init__( self, expr, stopOn=None):
  3258. super(_MultipleMatch, self).__init__(expr)
  3259. self.saveAsList = True
  3260. ender = stopOn
  3261. if isinstance(ender, basestring):
  3262. ender = ParserElement._literalStringClass(ender)
  3263. self.not_ender = ~ender if ender is not None else None
  3264. def parseImpl( self, instring, loc, doActions=True ):
  3265. self_expr_parse = self.expr._parse
  3266. self_skip_ignorables = self._skipIgnorables
  3267. check_ender = self.not_ender is not None
  3268. if check_ender:
  3269. try_not_ender = self.not_ender.tryParse
  3270. # must be at least one (but first see if we are the stopOn sentinel;
  3271. # if so, fail)
  3272. if check_ender:
  3273. try_not_ender(instring, loc)
  3274. loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
  3275. try:
  3276. hasIgnoreExprs = (not not self.ignoreExprs)
  3277. while 1:
  3278. if check_ender:
  3279. try_not_ender(instring, loc)
  3280. if hasIgnoreExprs:
  3281. preloc = self_skip_ignorables( instring, loc )
  3282. else:
  3283. preloc = loc
  3284. loc, tmptokens = self_expr_parse( instring, preloc, doActions )
  3285. if tmptokens or tmptokens.haskeys():
  3286. tokens += tmptokens
  3287. except (ParseException,IndexError):
  3288. pass
  3289. return loc, tokens
  3290. class OneOrMore(_MultipleMatch):
  3291. """
  3292. Repetition of one or more of the given expression.
  3293. Parameters:
  3294. - expr - expression that must match one or more times
  3295. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3296. (only required if the sentinel would ordinarily match the repetition
  3297. expression)
  3298. Example::
  3299. data_word = Word(alphas)
  3300. label = data_word + FollowedBy(':')
  3301. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3302. text = "shape: SQUARE posn: upper left color: BLACK"
  3303. OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
  3304. # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
  3305. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3306. OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
  3307. # could also be written as
  3308. (attr_expr * (1,)).parseString(text).pprint()
  3309. """
  3310. def __str__( self ):
  3311. if hasattr(self,"name"):
  3312. return self.name
  3313. if self.strRepr is None:
  3314. self.strRepr = "{" + _ustr(self.expr) + "}..."
  3315. return self.strRepr
  3316. class ZeroOrMore(_MultipleMatch):
  3317. """
  3318. Optional repetition of zero or more of the given expression.
  3319. Parameters:
  3320. - expr - expression that must match zero or more times
  3321. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3322. (only required if the sentinel would ordinarily match the repetition
  3323. expression)
  3324. Example: similar to L{OneOrMore}
  3325. """
  3326. def __init__( self, expr, stopOn=None):
  3327. super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
  3328. self.mayReturnEmpty = True
  3329. def parseImpl( self, instring, loc, doActions=True ):
  3330. try:
  3331. return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
  3332. except (ParseException,IndexError):
  3333. return loc, []
  3334. def __str__( self ):
  3335. if hasattr(self,"name"):
  3336. return self.name
  3337. if self.strRepr is None:
  3338. self.strRepr = "[" + _ustr(self.expr) + "]..."
  3339. return self.strRepr
  3340. class _NullToken(object):
  3341. def __bool__(self):
  3342. return False
  3343. __nonzero__ = __bool__
  3344. def __str__(self):
  3345. return ""
  3346. _optionalNotMatched = _NullToken()
  3347. class Optional(ParseElementEnhance):
  3348. """
  3349. Optional matching of the given expression.
  3350. Parameters:
  3351. - expr - expression that must match zero or more times
  3352. - default (optional) - value to be returned if the optional expression is not found.
  3353. Example::
  3354. # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
  3355. zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
  3356. zip.runTests('''
  3357. # traditional ZIP code
  3358. 12345
  3359. # ZIP+4 form
  3360. 12101-0001
  3361. # invalid ZIP
  3362. 98765-
  3363. ''')
  3364. prints::
  3365. # traditional ZIP code
  3366. 12345
  3367. ['12345']
  3368. # ZIP+4 form
  3369. 12101-0001
  3370. ['12101-0001']
  3371. # invalid ZIP
  3372. 98765-
  3373. ^
  3374. FAIL: Expected end of text (at char 5), (line:1, col:6)
  3375. """
  3376. def __init__( self, expr, default=_optionalNotMatched ):
  3377. super(Optional,self).__init__( expr, savelist=False )
  3378. self.saveAsList = self.expr.saveAsList
  3379. self.defaultValue = default
  3380. self.mayReturnEmpty = True
  3381. def parseImpl( self, instring, loc, doActions=True ):
  3382. try:
  3383. loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  3384. except (ParseException,IndexError):
  3385. if self.defaultValue is not _optionalNotMatched:
  3386. if self.expr.resultsName:
  3387. tokens = ParseResults([ self.defaultValue ])
  3388. tokens[self.expr.resultsName] = self.defaultValue
  3389. else:
  3390. tokens = [ self.defaultValue ]
  3391. else:
  3392. tokens = []
  3393. return loc, tokens
  3394. def __str__( self ):
  3395. if hasattr(self,"name"):
  3396. return self.name
  3397. if self.strRepr is None:
  3398. self.strRepr = "[" + _ustr(self.expr) + "]"
  3399. return self.strRepr
  3400. class SkipTo(ParseElementEnhance):
  3401. """
  3402. Token for skipping over all undefined text until the matched expression is found.
  3403. Parameters:
  3404. - expr - target expression marking the end of the data to be skipped
  3405. - include - (default=C{False}) if True, the target expression is also parsed
  3406. (the skipped text and target expression are returned as a 2-element list).
  3407. - ignore - (default=C{None}) used to define grammars (typically quoted strings and
  3408. comments) that might contain false matches to the target expression
  3409. - failOn - (default=C{None}) define expressions that are not allowed to be
  3410. included in the skipped test; if found before the target expression is found,
  3411. the SkipTo is not a match
  3412. Example::
  3413. report = '''
  3414. Outstanding Issues Report - 1 Jan 2000
  3415. # | Severity | Description | Days Open
  3416. -----+----------+-------------------------------------------+-----------
  3417. 101 | Critical | Intermittent system crash | 6
  3418. 94 | Cosmetic | Spelling error on Login ('log|n') | 14
  3419. 79 | Minor | System slow when running too many reports | 47
  3420. '''
  3421. integer = Word(nums)
  3422. SEP = Suppress('|')
  3423. # use SkipTo to simply match everything up until the next SEP
  3424. # - ignore quoted strings, so that a '|' character inside a quoted string does not match
  3425. # - parse action will call token.strip() for each matched token, i.e., the description body
  3426. string_data = SkipTo(SEP, ignore=quotedString)
  3427. string_data.setParseAction(tokenMap(str.strip))
  3428. ticket_expr = (integer("issue_num") + SEP
  3429. + string_data("sev") + SEP
  3430. + string_data("desc") + SEP
  3431. + integer("days_open"))
  3432. for tkt in ticket_expr.searchString(report):
  3433. print tkt.dump()
  3434. prints::
  3435. ['101', 'Critical', 'Intermittent system crash', '6']
  3436. - days_open: 6
  3437. - desc: Intermittent system crash
  3438. - issue_num: 101
  3439. - sev: Critical
  3440. ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
  3441. - days_open: 14
  3442. - desc: Spelling error on Login ('log|n')
  3443. - issue_num: 94
  3444. - sev: Cosmetic
  3445. ['79', 'Minor', 'System slow when running too many reports', '47']
  3446. - days_open: 47
  3447. - desc: System slow when running too many reports
  3448. - issue_num: 79
  3449. - sev: Minor
  3450. """
  3451. def __init__( self, other, include=False, ignore=None, failOn=None ):
  3452. super( SkipTo, self ).__init__( other )
  3453. self.ignoreExpr = ignore
  3454. self.mayReturnEmpty = True
  3455. self.mayIndexError = False
  3456. self.includeMatch = include
  3457. self.asList = False
  3458. if isinstance(failOn, basestring):
  3459. self.failOn = ParserElement._literalStringClass(failOn)
  3460. else:
  3461. self.failOn = failOn
  3462. self.errmsg = "No match found for "+_ustr(self.expr)
  3463. def parseImpl( self, instring, loc, doActions=True ):
  3464. startloc = loc
  3465. instrlen = len(instring)
  3466. expr = self.expr
  3467. expr_parse = self.expr._parse
  3468. self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
  3469. self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
  3470. tmploc = loc
  3471. while tmploc <= instrlen:
  3472. if self_failOn_canParseNext is not None:
  3473. # break if failOn expression matches
  3474. if self_failOn_canParseNext(instring, tmploc):
  3475. break
  3476. if self_ignoreExpr_tryParse is not None:
  3477. # advance past ignore expressions
  3478. while 1:
  3479. try:
  3480. tmploc = self_ignoreExpr_tryParse(instring, tmploc)
  3481. except ParseBaseException:
  3482. break
  3483. try:
  3484. expr_parse(instring, tmploc, doActions=False, callPreParse=False)
  3485. except (ParseException, IndexError):
  3486. # no match, advance loc in string
  3487. tmploc += 1
  3488. else:
  3489. # matched skipto expr, done
  3490. break
  3491. else:
  3492. # ran off the end of the input string without matching skipto expr, fail
  3493. raise ParseException(instring, loc, self.errmsg, self)
  3494. # build up return values
  3495. loc = tmploc
  3496. skiptext = instring[startloc:loc]
  3497. skipresult = ParseResults(skiptext)
  3498. if self.includeMatch:
  3499. loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
  3500. skipresult += mat
  3501. return loc, skipresult
  3502. class Forward(ParseElementEnhance):
  3503. """
  3504. Forward declaration of an expression to be defined later -
  3505. used for recursive grammars, such as algebraic infix notation.
  3506. When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
  3507. Note: take care when assigning to C{Forward} not to overlook precedence of operators.
  3508. Specifically, '|' has a lower precedence than '<<', so that::
  3509. fwdExpr << a | b | c
  3510. will actually be evaluated as::
  3511. (fwdExpr << a) | b | c
  3512. thereby leaving b and c out as parseable alternatives. It is recommended that you
  3513. explicitly group the values inserted into the C{Forward}::
  3514. fwdExpr << (a | b | c)
  3515. Converting to use the '<<=' operator instead will avoid this problem.
  3516. See L{ParseResults.pprint} for an example of a recursive parser created using
  3517. C{Forward}.
  3518. """
  3519. def __init__( self, other=None ):
  3520. super(Forward,self).__init__( other, savelist=False )
  3521. def __lshift__( self, other ):
  3522. if isinstance( other, basestring ):
  3523. other = ParserElement._literalStringClass(other)
  3524. self.expr = other
  3525. self.strRepr = None
  3526. self.mayIndexError = self.expr.mayIndexError
  3527. self.mayReturnEmpty = self.expr.mayReturnEmpty
  3528. self.setWhitespaceChars( self.expr.whiteChars )
  3529. self.skipWhitespace = self.expr.skipWhitespace
  3530. self.saveAsList = self.expr.saveAsList
  3531. self.ignoreExprs.extend(self.expr.ignoreExprs)
  3532. return self
  3533. def __ilshift__(self, other):
  3534. return self << other
  3535. def leaveWhitespace( self ):
  3536. self.skipWhitespace = False
  3537. return self
  3538. def streamline( self ):
  3539. if not self.streamlined:
  3540. self.streamlined = True
  3541. if self.expr is not None:
  3542. self.expr.streamline()
  3543. return self
  3544. def validate( self, validateTrace=[] ):
  3545. if self not in validateTrace:
  3546. tmp = validateTrace[:]+[self]
  3547. if self.expr is not None:
  3548. self.expr.validate(tmp)
  3549. self.checkRecursion([])
  3550. def __str__( self ):
  3551. if hasattr(self,"name"):
  3552. return self.name
  3553. return self.__class__.__name__ + ": ..."
  3554. # stubbed out for now - creates awful memory and perf issues
  3555. self._revertClass = self.__class__
  3556. self.__class__ = _ForwardNoRecurse
  3557. try:
  3558. if self.expr is not None:
  3559. retString = _ustr(self.expr)
  3560. else:
  3561. retString = "None"
  3562. finally:
  3563. self.__class__ = self._revertClass
  3564. return self.__class__.__name__ + ": " + retString
  3565. def copy(self):
  3566. if self.expr is not None:
  3567. return super(Forward,self).copy()
  3568. else:
  3569. ret = Forward()
  3570. ret <<= self
  3571. return ret
  3572. class _ForwardNoRecurse(Forward):
  3573. def __str__( self ):
  3574. return "..."
  3575. class TokenConverter(ParseElementEnhance):
  3576. """
  3577. Abstract subclass of C{ParseExpression}, for converting parsed results.
  3578. """
  3579. def __init__( self, expr, savelist=False ):
  3580. super(TokenConverter,self).__init__( expr )#, savelist )
  3581. self.saveAsList = False
  3582. class Combine(TokenConverter):
  3583. """
  3584. Converter to concatenate all matching tokens to a single string.
  3585. By default, the matching patterns must also be contiguous in the input string;
  3586. this can be disabled by specifying C{'adjacent=False'} in the constructor.
  3587. Example::
  3588. real = Word(nums) + '.' + Word(nums)
  3589. print(real.parseString('3.1416')) # -> ['3', '.', '1416']
  3590. # will also erroneously match the following
  3591. print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
  3592. real = Combine(Word(nums) + '.' + Word(nums))
  3593. print(real.parseString('3.1416')) # -> ['3.1416']
  3594. # no match when there are internal spaces
  3595. print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
  3596. """
  3597. def __init__( self, expr, joinString="", adjacent=True ):
  3598. super(Combine,self).__init__( expr )
  3599. # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  3600. if adjacent:
  3601. self.leaveWhitespace()
  3602. self.adjacent = adjacent
  3603. self.skipWhitespace = True
  3604. self.joinString = joinString
  3605. self.callPreparse = True
  3606. def ignore( self, other ):
  3607. if self.adjacent:
  3608. ParserElement.ignore(self, other)
  3609. else:
  3610. super( Combine, self).ignore( other )
  3611. return self
  3612. def postParse( self, instring, loc, tokenlist ):
  3613. retToks = tokenlist.copy()
  3614. del retToks[:]
  3615. retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  3616. if self.resultsName and retToks.haskeys():
  3617. return [ retToks ]
  3618. else:
  3619. return retToks
  3620. class Group(TokenConverter):
  3621. """
  3622. Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
  3623. Example::
  3624. ident = Word(alphas)
  3625. num = Word(nums)
  3626. term = ident | num
  3627. func = ident + Optional(delimitedList(term))
  3628. print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
  3629. func = ident + Group(Optional(delimitedList(term)))
  3630. print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
  3631. """
  3632. def __init__( self, expr ):
  3633. super(Group,self).__init__( expr )
  3634. self.saveAsList = True
  3635. def postParse( self, instring, loc, tokenlist ):
  3636. return [ tokenlist ]
  3637. class Dict(TokenConverter):
  3638. """
  3639. Converter to return a repetitive expression as a list, but also as a dictionary.
  3640. Each element can also be referenced using the first token in the expression as its key.
  3641. Useful for tabular report scraping when the first column can be used as a item key.
  3642. Example::
  3643. data_word = Word(alphas)
  3644. label = data_word + FollowedBy(':')
  3645. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3646. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3647. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3648. # print attributes as plain groups
  3649. print(OneOrMore(attr_expr).parseString(text).dump())
  3650. # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
  3651. result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
  3652. print(result.dump())
  3653. # access named fields as dict entries, or output as dict
  3654. print(result['shape'])
  3655. print(result.asDict())
  3656. prints::
  3657. ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
  3658. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3659. - color: light blue
  3660. - posn: upper left
  3661. - shape: SQUARE
  3662. - texture: burlap
  3663. SQUARE
  3664. {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
  3665. See more examples at L{ParseResults} of accessing fields by results name.
  3666. """
  3667. def __init__( self, expr ):
  3668. super(Dict,self).__init__( expr )
  3669. self.saveAsList = True
  3670. def postParse( self, instring, loc, tokenlist ):
  3671. for i,tok in enumerate(tokenlist):
  3672. if len(tok) == 0:
  3673. continue
  3674. ikey = tok[0]
  3675. if isinstance(ikey,int):
  3676. ikey = _ustr(tok[0]).strip()
  3677. if len(tok)==1:
  3678. tokenlist[ikey] = _ParseResultsWithOffset("",i)
  3679. elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  3680. tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
  3681. else:
  3682. dictvalue = tok.copy() #ParseResults(i)
  3683. del dictvalue[0]
  3684. if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
  3685. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
  3686. else:
  3687. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
  3688. if self.resultsName:
  3689. return [ tokenlist ]
  3690. else:
  3691. return tokenlist
  3692. class Suppress(TokenConverter):
  3693. """
  3694. Converter for ignoring the results of a parsed expression.
  3695. Example::
  3696. source = "a, b, c,d"
  3697. wd = Word(alphas)
  3698. wd_list1 = wd + ZeroOrMore(',' + wd)
  3699. print(wd_list1.parseString(source))
  3700. # often, delimiters that are useful during parsing are just in the
  3701. # way afterward - use Suppress to keep them out of the parsed output
  3702. wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
  3703. print(wd_list2.parseString(source))
  3704. prints::
  3705. ['a', ',', 'b', ',', 'c', ',', 'd']
  3706. ['a', 'b', 'c', 'd']
  3707. (See also L{delimitedList}.)
  3708. """
  3709. def postParse( self, instring, loc, tokenlist ):
  3710. return []
  3711. def suppress( self ):
  3712. return self
  3713. class OnlyOnce(object):
  3714. """
  3715. Wrapper for parse actions, to ensure they are only called once.
  3716. """
  3717. def __init__(self, methodCall):
  3718. self.callable = _trim_arity(methodCall)
  3719. self.called = False
  3720. def __call__(self,s,l,t):
  3721. if not self.called:
  3722. results = self.callable(s,l,t)
  3723. self.called = True
  3724. return results
  3725. raise ParseException(s,l,"")
  3726. def reset(self):
  3727. self.called = False
  3728. def traceParseAction(f):
  3729. """
  3730. Decorator for debugging parse actions.
  3731. When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
  3732. When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
  3733. Example::
  3734. wd = Word(alphas)
  3735. @traceParseAction
  3736. def remove_duplicate_chars(tokens):
  3737. return ''.join(sorted(set(''.join(tokens)))
  3738. wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
  3739. print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
  3740. prints::
  3741. >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
  3742. <<leaving remove_duplicate_chars (ret: 'dfjkls')
  3743. ['dfjkls']
  3744. """
  3745. f = _trim_arity(f)
  3746. def z(*paArgs):
  3747. thisFunc = f.__name__
  3748. s,l,t = paArgs[-3:]
  3749. if len(paArgs)>3:
  3750. thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  3751. sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
  3752. try:
  3753. ret = f(*paArgs)
  3754. except Exception as exc:
  3755. sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  3756. raise
  3757. sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
  3758. return ret
  3759. try:
  3760. z.__name__ = f.__name__
  3761. except AttributeError:
  3762. pass
  3763. return z
  3764. #
  3765. # global helpers
  3766. #
  3767. def delimitedList( expr, delim=",", combine=False ):
  3768. """
  3769. Helper to define a delimited list of expressions - the delimiter defaults to ','.
  3770. By default, the list elements and delimiters can have intervening whitespace, and
  3771. comments, but this can be overridden by passing C{combine=True} in the constructor.
  3772. If C{combine} is set to C{True}, the matching tokens are returned as a single token
  3773. string, with the delimiters included; otherwise, the matching tokens are returned
  3774. as a list of tokens, with the delimiters suppressed.
  3775. Example::
  3776. delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
  3777. delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
  3778. """
  3779. dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  3780. if combine:
  3781. return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  3782. else:
  3783. return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  3784. def countedArray( expr, intExpr=None ):
  3785. """
  3786. Helper to define a counted list of expressions.
  3787. This helper defines a pattern of the form::
  3788. integer expr expr expr...
  3789. where the leading integer tells how many expr expressions follow.
  3790. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
  3791. If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
  3792. Example::
  3793. countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
  3794. # in this parser, the leading integer value is given in binary,
  3795. # '10' indicating that 2 values are in the array
  3796. binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
  3797. countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
  3798. """
  3799. arrayExpr = Forward()
  3800. def countFieldParseAction(s,l,t):
  3801. n = t[0]
  3802. arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  3803. return []
  3804. if intExpr is None:
  3805. intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
  3806. else:
  3807. intExpr = intExpr.copy()
  3808. intExpr.setName("arrayLen")
  3809. intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
  3810. return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
  3811. def _flatten(L):
  3812. ret = []
  3813. for i in L:
  3814. if isinstance(i,list):
  3815. ret.extend(_flatten(i))
  3816. else:
  3817. ret.append(i)
  3818. return ret
  3819. def matchPreviousLiteral(expr):
  3820. """
  3821. Helper to define an expression that is indirectly defined from
  3822. the tokens matched in a previous expression, that is, it looks
  3823. for a 'repeat' of a previous expression. For example::
  3824. first = Word(nums)
  3825. second = matchPreviousLiteral(first)
  3826. matchExpr = first + ":" + second
  3827. will match C{"1:1"}, but not C{"1:2"}. Because this matches a
  3828. previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
  3829. If this is not desired, use C{matchPreviousExpr}.
  3830. Do I{not} use with packrat parsing enabled.
  3831. """
  3832. rep = Forward()
  3833. def copyTokenToRepeater(s,l,t):
  3834. if t:
  3835. if len(t) == 1:
  3836. rep << t[0]
  3837. else:
  3838. # flatten t tokens
  3839. tflat = _flatten(t.asList())
  3840. rep << And(Literal(tt) for tt in tflat)
  3841. else:
  3842. rep << Empty()
  3843. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  3844. rep.setName('(prev) ' + _ustr(expr))
  3845. return rep
  3846. def matchPreviousExpr(expr):
  3847. """
  3848. Helper to define an expression that is indirectly defined from
  3849. the tokens matched in a previous expression, that is, it looks
  3850. for a 'repeat' of a previous expression. For example::
  3851. first = Word(nums)
  3852. second = matchPreviousExpr(first)
  3853. matchExpr = first + ":" + second
  3854. will match C{"1:1"}, but not C{"1:2"}. Because this matches by
  3855. expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
  3856. the expressions are evaluated first, and then compared, so
  3857. C{"1"} is compared with C{"10"}.
  3858. Do I{not} use with packrat parsing enabled.
  3859. """
  3860. rep = Forward()
  3861. e2 = expr.copy()
  3862. rep <<= e2
  3863. def copyTokenToRepeater(s,l,t):
  3864. matchTokens = _flatten(t.asList())
  3865. def mustMatchTheseTokens(s,l,t):
  3866. theseTokens = _flatten(t.asList())
  3867. if theseTokens != matchTokens:
  3868. raise ParseException("",0,"")
  3869. rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
  3870. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  3871. rep.setName('(prev) ' + _ustr(expr))
  3872. return rep
  3873. def _escapeRegexRangeChars(s):
  3874. #~ escape these chars: ^-]
  3875. for c in r"\^-]":
  3876. s = s.replace(c,_bslash+c)
  3877. s = s.replace("\n",r"\n")
  3878. s = s.replace("\t",r"\t")
  3879. return _ustr(s)
  3880. def oneOf( strs, caseless=False, useRegex=True ):
  3881. """
  3882. Helper to quickly define a set of alternative Literals, and makes sure to do
  3883. longest-first testing when there is a conflict, regardless of the input order,
  3884. but returns a C{L{MatchFirst}} for best performance.
  3885. Parameters:
  3886. - strs - a string of space-delimited literals, or a collection of string literals
  3887. - caseless - (default=C{False}) - treat all literals as caseless
  3888. - useRegex - (default=C{True}) - as an optimization, will generate a Regex
  3889. object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
  3890. if creating a C{Regex} raises an exception)
  3891. Example::
  3892. comp_oper = oneOf("< = > <= >= !=")
  3893. var = Word(alphas)
  3894. number = Word(nums)
  3895. term = var | number
  3896. comparison_expr = term + comp_oper + term
  3897. print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
  3898. prints::
  3899. [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
  3900. """
  3901. if caseless:
  3902. isequal = ( lambda a,b: a.upper() == b.upper() )
  3903. masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  3904. parseElementClass = CaselessLiteral
  3905. else:
  3906. isequal = ( lambda a,b: a == b )
  3907. masks = ( lambda a,b: b.startswith(a) )
  3908. parseElementClass = Literal
  3909. symbols = []
  3910. if isinstance(strs,basestring):
  3911. symbols = strs.split()
  3912. elif isinstance(strs, collections.Iterable):
  3913. symbols = list(strs)
  3914. else:
  3915. warnings.warn("Invalid argument to oneOf, expected string or iterable",
  3916. SyntaxWarning, stacklevel=2)
  3917. if not symbols:
  3918. return NoMatch()
  3919. i = 0
  3920. while i < len(symbols)-1:
  3921. cur = symbols[i]
  3922. for j,other in enumerate(symbols[i+1:]):
  3923. if ( isequal(other, cur) ):
  3924. del symbols[i+j+1]
  3925. break
  3926. elif ( masks(cur, other) ):
  3927. del symbols[i+j+1]
  3928. symbols.insert(i,other)
  3929. cur = other
  3930. break
  3931. else:
  3932. i += 1
  3933. if not caseless and useRegex:
  3934. #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
  3935. try:
  3936. if len(symbols)==len("".join(symbols)):
  3937. return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
  3938. else:
  3939. return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
  3940. except Exception:
  3941. warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  3942. SyntaxWarning, stacklevel=2)
  3943. # last resort, just use MatchFirst
  3944. return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
  3945. def dictOf( key, value ):
  3946. """
  3947. Helper to easily and clearly define a dictionary by specifying the respective patterns
  3948. for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
  3949. in the proper order. The key pattern can include delimiting markers or punctuation,
  3950. as long as they are suppressed, thereby leaving the significant key text. The value
  3951. pattern can include named results, so that the C{Dict} results can include named token
  3952. fields.
  3953. Example::
  3954. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3955. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3956. print(OneOrMore(attr_expr).parseString(text).dump())
  3957. attr_label = label
  3958. attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
  3959. # similar to Dict, but simpler call format
  3960. result = dictOf(attr_label, attr_value).parseString(text)
  3961. print(result.dump())
  3962. print(result['shape'])
  3963. print(result.shape) # object attribute access works too
  3964. print(result.asDict())
  3965. prints::
  3966. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3967. - color: light blue
  3968. - posn: upper left
  3969. - shape: SQUARE
  3970. - texture: burlap
  3971. SQUARE
  3972. SQUARE
  3973. {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
  3974. """
  3975. return Dict( ZeroOrMore( Group ( key + value ) ) )
  3976. def originalTextFor(expr, asString=True):
  3977. """
  3978. Helper to return the original, untokenized text for a given expression. Useful to
  3979. restore the parsed fields of an HTML start tag into the raw tag text itself, or to
  3980. revert separate tokens with intervening whitespace back to the original matching
  3981. input text. By default, returns astring containing the original parsed text.
  3982. If the optional C{asString} argument is passed as C{False}, then the return value is a
  3983. C{L{ParseResults}} containing any results names that were originally matched, and a
  3984. single token containing the original matched text from the input string. So if
  3985. the expression passed to C{L{originalTextFor}} contains expressions with defined
  3986. results names, you must set C{asString} to C{False} if you want to preserve those
  3987. results name values.
  3988. Example::
  3989. src = "this is test <b> bold <i>text</i> </b> normal text "
  3990. for tag in ("b","i"):
  3991. opener,closer = makeHTMLTags(tag)
  3992. patt = originalTextFor(opener + SkipTo(closer) + closer)
  3993. print(patt.searchString(src)[0])
  3994. prints::
  3995. ['<b> bold <i>text</i> </b>']
  3996. ['<i>text</i>']
  3997. """
  3998. locMarker = Empty().setParseAction(lambda s,loc,t: loc)
  3999. endlocMarker = locMarker.copy()
  4000. endlocMarker.callPreparse = False
  4001. matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
  4002. if asString:
  4003. extractText = lambda s,l,t: s[t._original_start:t._original_end]
  4004. else:
  4005. def extractText(s,l,t):
  4006. t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
  4007. matchExpr.setParseAction(extractText)
  4008. matchExpr.ignoreExprs = expr.ignoreExprs
  4009. return matchExpr
  4010. def ungroup(expr):
  4011. """
  4012. Helper to undo pyparsing's default grouping of And expressions, even
  4013. if all but one are non-empty.
  4014. """
  4015. return TokenConverter(expr).setParseAction(lambda t:t[0])
  4016. def locatedExpr(expr):
  4017. """
  4018. Helper to decorate a returned token with its starting and ending locations in the input string.
  4019. This helper adds the following results names:
  4020. - locn_start = location where matched expression begins
  4021. - locn_end = location where matched expression ends
  4022. - value = the actual parsed results
  4023. Be careful if the input text contains C{<TAB>} characters, you may want to call
  4024. C{L{ParserElement.parseWithTabs}}
  4025. Example::
  4026. wd = Word(alphas)
  4027. for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
  4028. print(match)
  4029. prints::
  4030. [[0, 'ljsdf', 5]]
  4031. [[8, 'lksdjjf', 15]]
  4032. [[18, 'lkkjj', 23]]
  4033. """
  4034. locator = Empty().setParseAction(lambda s,l,t: l)
  4035. return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
  4036. # convenience constants for positional expressions
  4037. empty = Empty().setName("empty")
  4038. lineStart = LineStart().setName("lineStart")
  4039. lineEnd = LineEnd().setName("lineEnd")
  4040. stringStart = StringStart().setName("stringStart")
  4041. stringEnd = StringEnd().setName("stringEnd")
  4042. _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  4043. _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
  4044. _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
  4045. _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
  4046. _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  4047. _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  4048. def srange(s):
  4049. r"""
  4050. Helper to easily define string ranges for use in Word construction. Borrows
  4051. syntax from regexp '[]' string range definitions::
  4052. srange("[0-9]") -> "0123456789"
  4053. srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
  4054. srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  4055. The input string must be enclosed in []'s, and the returned string is the expanded
  4056. character set joined into a single string.
  4057. The values enclosed in the []'s may be:
  4058. - a single character
  4059. - an escaped character with a leading backslash (such as C{\-} or C{\]})
  4060. - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
  4061. (C{\0x##} is also supported for backwards compatibility)
  4062. - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
  4063. - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
  4064. - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
  4065. """
  4066. _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
  4067. try:
  4068. return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
  4069. except Exception:
  4070. return ""
  4071. def matchOnlyAtCol(n):
  4072. """
  4073. Helper method for defining parse actions that require matching at a specific
  4074. column in the input text.
  4075. """
  4076. def verifyCol(strg,locn,toks):
  4077. if col(locn,strg) != n:
  4078. raise ParseException(strg,locn,"matched token not at column %d" % n)
  4079. return verifyCol
  4080. def replaceWith(replStr):
  4081. """
  4082. Helper method for common parse actions that simply return a literal value. Especially
  4083. useful when used with C{L{transformString<ParserElement.transformString>}()}.
  4084. Example::
  4085. num = Word(nums).setParseAction(lambda toks: int(toks[0]))
  4086. na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
  4087. term = na | num
  4088. OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
  4089. """
  4090. return lambda s,l,t: [replStr]
  4091. def removeQuotes(s,l,t):
  4092. """
  4093. Helper parse action for removing quotation marks from parsed quoted strings.
  4094. Example::
  4095. # by default, quotation marks are included in parsed results
  4096. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
  4097. # use removeQuotes to strip quotation marks from parsed results
  4098. quotedString.setParseAction(removeQuotes)
  4099. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
  4100. """
  4101. return t[0][1:-1]
  4102. def tokenMap(func, *args):
  4103. """
  4104. Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
  4105. args are passed, they are forwarded to the given function as additional arguments after
  4106. the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
  4107. parsed data to an integer using base 16.
  4108. Example (compare the last to example in L{ParserElement.transformString}::
  4109. hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
  4110. hex_ints.runTests('''
  4111. 00 11 22 aa FF 0a 0d 1a
  4112. ''')
  4113. upperword = Word(alphas).setParseAction(tokenMap(str.upper))
  4114. OneOrMore(upperword).runTests('''
  4115. my kingdom for a horse
  4116. ''')
  4117. wd = Word(alphas).setParseAction(tokenMap(str.title))
  4118. OneOrMore(wd).setParseAction(' '.join).runTests('''
  4119. now is the winter of our discontent made glorious summer by this sun of york
  4120. ''')
  4121. prints::
  4122. 00 11 22 aa FF 0a 0d 1a
  4123. [0, 17, 34, 170, 255, 10, 13, 26]
  4124. my kingdom for a horse
  4125. ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
  4126. now is the winter of our discontent made glorious summer by this sun of york
  4127. ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
  4128. """
  4129. def pa(s,l,t):
  4130. return [func(tokn, *args) for tokn in t]
  4131. try:
  4132. func_name = getattr(func, '__name__',
  4133. getattr(func, '__class__').__name__)
  4134. except Exception:
  4135. func_name = str(func)
  4136. pa.__name__ = func_name
  4137. return pa
  4138. upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
  4139. """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
  4140. downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
  4141. """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
  4142. def _makeTags(tagStr, xml):
  4143. """Internal helper to construct opening and closing tag expressions, given a tag name"""
  4144. if isinstance(tagStr,basestring):
  4145. resname = tagStr
  4146. tagStr = Keyword(tagStr, caseless=not xml)
  4147. else:
  4148. resname = tagStr.name
  4149. tagAttrName = Word(alphas,alphanums+"_-:")
  4150. if (xml):
  4151. tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  4152. openTag = Suppress("<") + tagStr("tag") + \
  4153. Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
  4154. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4155. else:
  4156. printablesLessRAbrack = "".join(c for c in printables if c not in ">")
  4157. tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
  4158. openTag = Suppress("<") + tagStr("tag") + \
  4159. Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
  4160. Optional( Suppress("=") + tagAttrValue ) ))) + \
  4161. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4162. closeTag = Combine(_L("</") + tagStr + ">")
  4163. openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
  4164. closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
  4165. openTag.tag = resname
  4166. closeTag.tag = resname
  4167. return openTag, closeTag
  4168. def makeHTMLTags(tagStr):
  4169. """
  4170. Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
  4171. tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
  4172. Example::
  4173. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4174. # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
  4175. a,a_end = makeHTMLTags("A")
  4176. link_expr = a + SkipTo(a_end)("link_text") + a_end
  4177. for link in link_expr.searchString(text):
  4178. # attributes in the <A> tag (like "href" shown here) are also accessible as named results
  4179. print(link.link_text, '->', link.href)
  4180. prints::
  4181. pyparsing -> http://pyparsing.wikispaces.com
  4182. """
  4183. return _makeTags( tagStr, False )
  4184. def makeXMLTags(tagStr):
  4185. """
  4186. Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
  4187. tags only in the given upper/lower case.
  4188. Example: similar to L{makeHTMLTags}
  4189. """
  4190. return _makeTags( tagStr, True )
  4191. def withAttribute(*args,**attrDict):
  4192. """
  4193. Helper to create a validating parse action to be used with start tags created
  4194. with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
  4195. with a required attribute value, to avoid false matches on common tags such as
  4196. C{<TD>} or C{<DIV>}.
  4197. Call C{withAttribute} with a series of attribute names and values. Specify the list
  4198. of filter attributes names and values as:
  4199. - keyword arguments, as in C{(align="right")}, or
  4200. - as an explicit dict with C{**} operator, when an attribute name is also a Python
  4201. reserved word, as in C{**{"class":"Customer", "align":"right"}}
  4202. - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
  4203. For attribute names with a namespace prefix, you must use the second form. Attribute
  4204. names are matched insensitive to upper/lower case.
  4205. If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
  4206. To verify that the attribute exists, but without specifying a value, pass
  4207. C{withAttribute.ANY_VALUE} as the value.
  4208. Example::
  4209. html = '''
  4210. <div>
  4211. Some text
  4212. <div type="grid">1 4 0 1 0</div>
  4213. <div type="graph">1,3 2,3 1,1</div>
  4214. <div>this has no type</div>
  4215. </div>
  4216. '''
  4217. div,div_end = makeHTMLTags("div")
  4218. # only match div tag having a type attribute with value "grid"
  4219. div_grid = div().setParseAction(withAttribute(type="grid"))
  4220. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4221. for grid_header in grid_expr.searchString(html):
  4222. print(grid_header.body)
  4223. # construct a match with any div tag having a type attribute, regardless of the value
  4224. div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
  4225. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4226. for div_header in div_expr.searchString(html):
  4227. print(div_header.body)
  4228. prints::
  4229. 1 4 0 1 0
  4230. 1 4 0 1 0
  4231. 1,3 2,3 1,1
  4232. """
  4233. if args:
  4234. attrs = args[:]
  4235. else:
  4236. attrs = attrDict.items()
  4237. attrs = [(k,v) for k,v in attrs]
  4238. def pa(s,l,tokens):
  4239. for attrName,attrValue in attrs:
  4240. if attrName not in tokens:
  4241. raise ParseException(s,l,"no matching attribute " + attrName)
  4242. if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
  4243. raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
  4244. (attrName, tokens[attrName], attrValue))
  4245. return pa
  4246. withAttribute.ANY_VALUE = object()
  4247. def withClass(classname, namespace=''):
  4248. """
  4249. Simplified version of C{L{withAttribute}} when matching on a div class - made
  4250. difficult because C{class} is a reserved word in Python.
  4251. Example::
  4252. html = '''
  4253. <div>
  4254. Some text
  4255. <div class="grid">1 4 0 1 0</div>
  4256. <div class="graph">1,3 2,3 1,1</div>
  4257. <div>this &lt;div&gt; has no class</div>
  4258. </div>
  4259. '''
  4260. div,div_end = makeHTMLTags("div")
  4261. div_grid = div().setParseAction(withClass("grid"))
  4262. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4263. for grid_header in grid_expr.searchString(html):
  4264. print(grid_header.body)
  4265. div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
  4266. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4267. for div_header in div_expr.searchString(html):
  4268. print(div_header.body)
  4269. prints::
  4270. 1 4 0 1 0
  4271. 1 4 0 1 0
  4272. 1,3 2,3 1,1
  4273. """
  4274. classattr = "%s:class" % namespace if namespace else "class"
  4275. return withAttribute(**{classattr : classname})
  4276. opAssoc = _Constants()
  4277. opAssoc.LEFT = object()
  4278. opAssoc.RIGHT = object()
  4279. def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
  4280. """
  4281. Helper method for constructing grammars of expressions made up of
  4282. operators working in a precedence hierarchy. Operators may be unary or
  4283. binary, left- or right-associative. Parse actions can also be attached
  4284. to operator expressions. The generated parser will also recognize the use
  4285. of parentheses to override operator precedences (see example below).
  4286. Note: if you define a deep operator list, you may see performance issues
  4287. when using infixNotation. See L{ParserElement.enablePackrat} for a
  4288. mechanism to potentially improve your parser performance.
  4289. Parameters:
  4290. - baseExpr - expression representing the most basic element for the nested
  4291. - opList - list of tuples, one for each operator precedence level in the
  4292. expression grammar; each tuple is of the form
  4293. (opExpr, numTerms, rightLeftAssoc, parseAction), where:
  4294. - opExpr is the pyparsing expression for the operator;
  4295. may also be a string, which will be converted to a Literal;
  4296. if numTerms is 3, opExpr is a tuple of two expressions, for the
  4297. two operators separating the 3 terms
  4298. - numTerms is the number of terms for this operator (must
  4299. be 1, 2, or 3)
  4300. - rightLeftAssoc is the indicator whether the operator is
  4301. right or left associative, using the pyparsing-defined
  4302. constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
  4303. - parseAction is the parse action to be associated with
  4304. expressions matching this operator expression (the
  4305. parse action tuple member may be omitted)
  4306. - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
  4307. - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
  4308. Example::
  4309. # simple example of four-function arithmetic with ints and variable names
  4310. integer = pyparsing_common.signed_integer
  4311. varname = pyparsing_common.identifier
  4312. arith_expr = infixNotation(integer | varname,
  4313. [
  4314. ('-', 1, opAssoc.RIGHT),
  4315. (oneOf('* /'), 2, opAssoc.LEFT),
  4316. (oneOf('+ -'), 2, opAssoc.LEFT),
  4317. ])
  4318. arith_expr.runTests('''
  4319. 5+3*6
  4320. (5+3)*6
  4321. -2--11
  4322. ''', fullDump=False)
  4323. prints::
  4324. 5+3*6
  4325. [[5, '+', [3, '*', 6]]]
  4326. (5+3)*6
  4327. [[[5, '+', 3], '*', 6]]
  4328. -2--11
  4329. [[['-', 2], '-', ['-', 11]]]
  4330. """
  4331. ret = Forward()
  4332. lastExpr = baseExpr | ( lpar + ret + rpar )
  4333. for i,operDef in enumerate(opList):
  4334. opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  4335. termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
  4336. if arity == 3:
  4337. if opExpr is None or len(opExpr) != 2:
  4338. raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
  4339. opExpr1, opExpr2 = opExpr
  4340. thisExpr = Forward().setName(termName)
  4341. if rightLeftAssoc == opAssoc.LEFT:
  4342. if arity == 1:
  4343. matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
  4344. elif arity == 2:
  4345. if opExpr is not None:
  4346. matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  4347. else:
  4348. matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
  4349. elif arity == 3:
  4350. matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
  4351. Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
  4352. else:
  4353. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4354. elif rightLeftAssoc == opAssoc.RIGHT:
  4355. if arity == 1:
  4356. # try to avoid LR with this extra test
  4357. if not isinstance(opExpr, Optional):
  4358. opExpr = Optional(opExpr)
  4359. matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
  4360. elif arity == 2:
  4361. if opExpr is not None:
  4362. matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  4363. else:
  4364. matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
  4365. elif arity == 3:
  4366. matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
  4367. Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
  4368. else:
  4369. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4370. else:
  4371. raise ValueError("operator must indicate right or left associativity")
  4372. if pa:
  4373. matchExpr.setParseAction( pa )
  4374. thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
  4375. lastExpr = thisExpr
  4376. ret <<= lastExpr
  4377. return ret
  4378. operatorPrecedence = infixNotation
  4379. """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
  4380. dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
  4381. sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
  4382. quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
  4383. Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
  4384. unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
  4385. def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
  4386. """
  4387. Helper method for defining nested lists enclosed in opening and closing
  4388. delimiters ("(" and ")" are the default).
  4389. Parameters:
  4390. - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
  4391. - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
  4392. - content - expression for items within the nested lists (default=C{None})
  4393. - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
  4394. If an expression is not provided for the content argument, the nested
  4395. expression will capture all whitespace-delimited content between delimiters
  4396. as a list of separate values.
  4397. Use the C{ignoreExpr} argument to define expressions that may contain
  4398. opening or closing characters that should not be treated as opening
  4399. or closing characters for nesting, such as quotedString or a comment
  4400. expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
  4401. The default is L{quotedString}, but if no expressions are to be ignored,
  4402. then pass C{None} for this argument.
  4403. Example::
  4404. data_type = oneOf("void int short long char float double")
  4405. decl_data_type = Combine(data_type + Optional(Word('*')))
  4406. ident = Word(alphas+'_', alphanums+'_')
  4407. number = pyparsing_common.number
  4408. arg = Group(decl_data_type + ident)
  4409. LPAR,RPAR = map(Suppress, "()")
  4410. code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
  4411. c_function = (decl_data_type("type")
  4412. + ident("name")
  4413. + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
  4414. + code_body("body"))
  4415. c_function.ignore(cStyleComment)
  4416. source_code = '''
  4417. int is_odd(int x) {
  4418. return (x%2);
  4419. }
  4420. int dec_to_hex(char hchar) {
  4421. if (hchar >= '0' && hchar <= '9') {
  4422. return (ord(hchar)-ord('0'));
  4423. } else {
  4424. return (10+ord(hchar)-ord('A'));
  4425. }
  4426. }
  4427. '''
  4428. for func in c_function.searchString(source_code):
  4429. print("%(name)s (%(type)s) args: %(args)s" % func)
  4430. prints::
  4431. is_odd (int) args: [['int', 'x']]
  4432. dec_to_hex (int) args: [['char', 'hchar']]
  4433. """
  4434. if opener == closer:
  4435. raise ValueError("opening and closing strings cannot be the same")
  4436. if content is None:
  4437. if isinstance(opener,basestring) and isinstance(closer,basestring):
  4438. if len(opener) == 1 and len(closer)==1:
  4439. if ignoreExpr is not None:
  4440. content = (Combine(OneOrMore(~ignoreExpr +
  4441. CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4442. ).setParseAction(lambda t:t[0].strip()))
  4443. else:
  4444. content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
  4445. ).setParseAction(lambda t:t[0].strip()))
  4446. else:
  4447. if ignoreExpr is not None:
  4448. content = (Combine(OneOrMore(~ignoreExpr +
  4449. ~Literal(opener) + ~Literal(closer) +
  4450. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4451. ).setParseAction(lambda t:t[0].strip()))
  4452. else:
  4453. content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
  4454. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4455. ).setParseAction(lambda t:t[0].strip()))
  4456. else:
  4457. raise ValueError("opening and closing arguments must be strings if no content expression is given")
  4458. ret = Forward()
  4459. if ignoreExpr is not None:
  4460. ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
  4461. else:
  4462. ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
  4463. ret.setName('nested %s%s expression' % (opener,closer))
  4464. return ret
  4465. def indentedBlock(blockStatementExpr, indentStack, indent=True):
  4466. """
  4467. Helper method for defining space-delimited indentation blocks, such as
  4468. those used to define block statements in Python source code.
  4469. Parameters:
  4470. - blockStatementExpr - expression defining syntax of statement that
  4471. is repeated within the indented block
  4472. - indentStack - list created by caller to manage indentation stack
  4473. (multiple statementWithIndentedBlock expressions within a single grammar
  4474. should share a common indentStack)
  4475. - indent - boolean indicating whether block must be indented beyond the
  4476. the current level; set to False for block of left-most statements
  4477. (default=C{True})
  4478. A valid block must contain at least one C{blockStatement}.
  4479. Example::
  4480. data = '''
  4481. def A(z):
  4482. A1
  4483. B = 100
  4484. G = A2
  4485. A2
  4486. A3
  4487. B
  4488. def BB(a,b,c):
  4489. BB1
  4490. def BBA():
  4491. bba1
  4492. bba2
  4493. bba3
  4494. C
  4495. D
  4496. def spam(x,y):
  4497. def eggs(z):
  4498. pass
  4499. '''
  4500. indentStack = [1]
  4501. stmt = Forward()
  4502. identifier = Word(alphas, alphanums)
  4503. funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
  4504. func_body = indentedBlock(stmt, indentStack)
  4505. funcDef = Group( funcDecl + func_body )
  4506. rvalue = Forward()
  4507. funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
  4508. rvalue << (funcCall | identifier | Word(nums))
  4509. assignment = Group(identifier + "=" + rvalue)
  4510. stmt << ( funcDef | assignment | identifier )
  4511. module_body = OneOrMore(stmt)
  4512. parseTree = module_body.parseString(data)
  4513. parseTree.pprint()
  4514. prints::
  4515. [['def',
  4516. 'A',
  4517. ['(', 'z', ')'],
  4518. ':',
  4519. [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
  4520. 'B',
  4521. ['def',
  4522. 'BB',
  4523. ['(', 'a', 'b', 'c', ')'],
  4524. ':',
  4525. [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
  4526. 'C',
  4527. 'D',
  4528. ['def',
  4529. 'spam',
  4530. ['(', 'x', 'y', ')'],
  4531. ':',
  4532. [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
  4533. """
  4534. def checkPeerIndent(s,l,t):
  4535. if l >= len(s): return
  4536. curCol = col(l,s)
  4537. if curCol != indentStack[-1]:
  4538. if curCol > indentStack[-1]:
  4539. raise ParseFatalException(s,l,"illegal nesting")
  4540. raise ParseException(s,l,"not a peer entry")
  4541. def checkSubIndent(s,l,t):
  4542. curCol = col(l,s)
  4543. if curCol > indentStack[-1]:
  4544. indentStack.append( curCol )
  4545. else:
  4546. raise ParseException(s,l,"not a subentry")
  4547. def checkUnindent(s,l,t):
  4548. if l >= len(s): return
  4549. curCol = col(l,s)
  4550. if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
  4551. raise ParseException(s,l,"not an unindent")
  4552. indentStack.pop()
  4553. NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
  4554. INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
  4555. PEER = Empty().setParseAction(checkPeerIndent).setName('')
  4556. UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
  4557. if indent:
  4558. smExpr = Group( Optional(NL) +
  4559. #~ FollowedBy(blockStatementExpr) +
  4560. INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
  4561. else:
  4562. smExpr = Group( Optional(NL) +
  4563. (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
  4564. blockStatementExpr.ignore(_bslash + LineEnd())
  4565. return smExpr.setName('indented block')
  4566. alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  4567. punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  4568. anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
  4569. _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
  4570. commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
  4571. def replaceHTMLEntity(t):
  4572. """Helper parser action to replace common HTML entities with their special characters"""
  4573. return _htmlEntityMap.get(t.entity)
  4574. # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  4575. cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
  4576. "Comment of the form C{/* ... */}"
  4577. htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
  4578. "Comment of the form C{<!-- ... -->}"
  4579. restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
  4580. dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
  4581. "Comment of the form C{// ... (to end of line)}"
  4582. cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
  4583. "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
  4584. javaStyleComment = cppStyleComment
  4585. "Same as C{L{cppStyleComment}}"
  4586. pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  4587. "Comment of the form C{# ... (to end of line)}"
  4588. _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
  4589. Optional( Word(" \t") +
  4590. ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  4591. commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
  4592. """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
  4593. This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
  4594. # some other useful expressions - using lower-case class name since we are really using this as a namespace
  4595. class pyparsing_common:
  4596. """
  4597. Here are some common low-level expressions that may be useful in jump-starting parser development:
  4598. - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
  4599. - common L{programming identifiers<identifier>}
  4600. - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
  4601. - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
  4602. - L{UUID<uuid>}
  4603. - L{comma-separated list<comma_separated_list>}
  4604. Parse actions:
  4605. - C{L{convertToInteger}}
  4606. - C{L{convertToFloat}}
  4607. - C{L{convertToDate}}
  4608. - C{L{convertToDatetime}}
  4609. - C{L{stripHTMLTags}}
  4610. - C{L{upcaseTokens}}
  4611. - C{L{downcaseTokens}}
  4612. Example::
  4613. pyparsing_common.number.runTests('''
  4614. # any int or real number, returned as the appropriate type
  4615. 100
  4616. -100
  4617. +100
  4618. 3.14159
  4619. 6.02e23
  4620. 1e-12
  4621. ''')
  4622. pyparsing_common.fnumber.runTests('''
  4623. # any int or real number, returned as float
  4624. 100
  4625. -100
  4626. +100
  4627. 3.14159
  4628. 6.02e23
  4629. 1e-12
  4630. ''')
  4631. pyparsing_common.hex_integer.runTests('''
  4632. # hex numbers
  4633. 100
  4634. FF
  4635. ''')
  4636. pyparsing_common.fraction.runTests('''
  4637. # fractions
  4638. 1/2
  4639. -3/4
  4640. ''')
  4641. pyparsing_common.mixed_integer.runTests('''
  4642. # mixed fractions
  4643. 1
  4644. 1/2
  4645. -3/4
  4646. 1-3/4
  4647. ''')
  4648. import uuid
  4649. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  4650. pyparsing_common.uuid.runTests('''
  4651. # uuid
  4652. 12345678-1234-5678-1234-567812345678
  4653. ''')
  4654. prints::
  4655. # any int or real number, returned as the appropriate type
  4656. 100
  4657. [100]
  4658. -100
  4659. [-100]
  4660. +100
  4661. [100]
  4662. 3.14159
  4663. [3.14159]
  4664. 6.02e23
  4665. [6.02e+23]
  4666. 1e-12
  4667. [1e-12]
  4668. # any int or real number, returned as float
  4669. 100
  4670. [100.0]
  4671. -100
  4672. [-100.0]
  4673. +100
  4674. [100.0]
  4675. 3.14159
  4676. [3.14159]
  4677. 6.02e23
  4678. [6.02e+23]
  4679. 1e-12
  4680. [1e-12]
  4681. # hex numbers
  4682. 100
  4683. [256]
  4684. FF
  4685. [255]
  4686. # fractions
  4687. 1/2
  4688. [0.5]
  4689. -3/4
  4690. [-0.75]
  4691. # mixed fractions
  4692. 1
  4693. [1]
  4694. 1/2
  4695. [0.5]
  4696. -3/4
  4697. [-0.75]
  4698. 1-3/4
  4699. [1.75]
  4700. # uuid
  4701. 12345678-1234-5678-1234-567812345678
  4702. [UUID('12345678-1234-5678-1234-567812345678')]
  4703. """
  4704. convertToInteger = tokenMap(int)
  4705. """
  4706. Parse action for converting parsed integers to Python int
  4707. """
  4708. convertToFloat = tokenMap(float)
  4709. """
  4710. Parse action for converting parsed numbers to Python float
  4711. """
  4712. integer = Word(nums).setName("integer").setParseAction(convertToInteger)
  4713. """expression that parses an unsigned integer, returns an int"""
  4714. hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
  4715. """expression that parses a hexadecimal integer, returns an int"""
  4716. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
  4717. """expression that parses an integer with optional leading sign, returns an int"""
  4718. fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
  4719. """fractional expression of an integer divided by an integer, returns a float"""
  4720. fraction.addParseAction(lambda t: t[0]/t[-1])
  4721. mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
  4722. """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
  4723. mixed_integer.addParseAction(sum)
  4724. real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
  4725. """expression that parses a floating point number and returns a float"""
  4726. sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
  4727. """expression that parses a floating point number with optional scientific notation and returns a float"""
  4728. # streamlining this expression makes the docs nicer-looking
  4729. number = (sci_real | real | signed_integer).streamline()
  4730. """any numeric expression, returns the corresponding Python type"""
  4731. fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
  4732. """any int or real number, returned as float"""
  4733. identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
  4734. """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
  4735. ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
  4736. "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
  4737. _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
  4738. _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
  4739. _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
  4740. _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
  4741. _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
  4742. ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
  4743. "IPv6 address (long, short, or mixed form)"
  4744. mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
  4745. "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
  4746. @staticmethod
  4747. def convertToDate(fmt="%Y-%m-%d"):
  4748. """
  4749. Helper to create a parse action for converting parsed date string to Python datetime.date
  4750. Params -
  4751. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
  4752. Example::
  4753. date_expr = pyparsing_common.iso8601_date.copy()
  4754. date_expr.setParseAction(pyparsing_common.convertToDate())
  4755. print(date_expr.parseString("1999-12-31"))
  4756. prints::
  4757. [datetime.date(1999, 12, 31)]
  4758. """
  4759. def cvt_fn(s,l,t):
  4760. try:
  4761. return datetime.strptime(t[0], fmt).date()
  4762. except ValueError as ve:
  4763. raise ParseException(s, l, str(ve))
  4764. return cvt_fn
  4765. @staticmethod
  4766. def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
  4767. """
  4768. Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
  4769. Params -
  4770. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
  4771. Example::
  4772. dt_expr = pyparsing_common.iso8601_datetime.copy()
  4773. dt_expr.setParseAction(pyparsing_common.convertToDatetime())
  4774. print(dt_expr.parseString("1999-12-31T23:59:59.999"))
  4775. prints::
  4776. [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
  4777. """
  4778. def cvt_fn(s,l,t):
  4779. try:
  4780. return datetime.strptime(t[0], fmt)
  4781. except ValueError as ve:
  4782. raise ParseException(s, l, str(ve))
  4783. return cvt_fn
  4784. iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
  4785. "ISO8601 date (C{yyyy-mm-dd})"
  4786. iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
  4787. "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
  4788. uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
  4789. "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
  4790. _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
  4791. @staticmethod
  4792. def stripHTMLTags(s, l, tokens):
  4793. """
  4794. Parse action to remove HTML tags from web page HTML source
  4795. Example::
  4796. # strip HTML links from normal text
  4797. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4798. td,td_end = makeHTMLTags("TD")
  4799. table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
  4800. print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
  4801. """
  4802. return pyparsing_common._html_stripper.transformString(tokens[0])
  4803. _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
  4804. + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
  4805. comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
  4806. """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
  4807. upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
  4808. """Parse action to convert tokens to upper case."""
  4809. downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
  4810. """Parse action to convert tokens to lower case."""
  4811. if __name__ == "__main__":
  4812. selectToken = CaselessLiteral("select")
  4813. fromToken = CaselessLiteral("from")
  4814. ident = Word(alphas, alphanums + "_$")
  4815. columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  4816. columnNameList = Group(delimitedList(columnName)).setName("columns")
  4817. columnSpec = ('*' | columnNameList)
  4818. tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  4819. tableNameList = Group(delimitedList(tableName)).setName("tables")
  4820. simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
  4821. # demo runTests method, including embedded comments in test string
  4822. simpleSQL.runTests("""
  4823. # '*' as column list and dotted table name
  4824. select * from SYS.XYZZY
  4825. # caseless match on "SELECT", and casts back to "select"
  4826. SELECT * from XYZZY, ABC
  4827. # list of column names, and mixed case SELECT keyword
  4828. Select AA,BB,CC from Sys.dual
  4829. # multiple tables
  4830. Select A, B, C from Sys.dual, Table2
  4831. # invalid SELECT keyword - should fail
  4832. Xelect A, B, C from Sys.dual
  4833. # incomplete command - should fail
  4834. Select
  4835. # invalid column name - should fail
  4836. Select ^^^ frox Sys.dual
  4837. """)
  4838. pyparsing_common.number.runTests("""
  4839. 100
  4840. -100
  4841. +100
  4842. 3.14159
  4843. 6.02e23
  4844. 1e-12
  4845. """)
  4846. # any int or real number, returned as float
  4847. pyparsing_common.fnumber.runTests("""
  4848. 100
  4849. -100
  4850. +100
  4851. 3.14159
  4852. 6.02e23
  4853. 1e-12
  4854. """)
  4855. pyparsing_common.hex_integer.runTests("""
  4856. 100
  4857. FF
  4858. """)
  4859. import uuid
  4860. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  4861. pyparsing_common.uuid.runTests("""
  4862. 12345678-1234-5678-1234-567812345678
  4863. """)