Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

autopep8.py 130KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961
  1. #!/usr/bin/env python
  2. # Copyright (C) 2010-2011 Hideo Hattori
  3. # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
  4. # Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining
  7. # a copy of this software and associated documentation files (the
  8. # "Software"), to deal in the Software without restriction, including
  9. # without limitation the rights to use, copy, modify, merge, publish,
  10. # distribute, sublicense, and/or sell copies of the Software, and to
  11. # permit persons to whom the Software is furnished to do so, subject to
  12. # the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be
  15. # included in all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  21. # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  22. # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23. # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. # SOFTWARE.
  25. """Automatically formats Python code to conform to the PEP 8 style guide.
  26. Fixes that only need be done once can be added by adding a function of the form
  27. "fix_<code>(source)" to this module. They should return the fixed source code.
  28. These fixes are picked up by apply_global_fixes().
  29. Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
  30. class documentation for more information.
  31. """
  32. from __future__ import absolute_import
  33. from __future__ import division
  34. from __future__ import print_function
  35. from __future__ import unicode_literals
  36. import argparse
  37. import codecs
  38. import collections
  39. import copy
  40. import difflib
  41. import fnmatch
  42. import inspect
  43. import io
  44. import keyword
  45. import locale
  46. import os
  47. import re
  48. import signal
  49. import sys
  50. import textwrap
  51. import token
  52. import tokenize
  53. import pycodestyle
  54. try:
  55. unicode
  56. except NameError:
  57. unicode = str
  58. __version__ = '1.3.5'
  59. CR = '\r'
  60. LF = '\n'
  61. CRLF = '\r\n'
  62. PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
  63. LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([\(\)\w,\s.]*):')
  64. COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
  65. COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s')
  66. BARE_EXCEPT_REGEX = re.compile(r'except\s*:')
  67. STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):')
  68. # For generating line shortening candidates.
  69. SHORTEN_OPERATOR_GROUPS = frozenset([
  70. frozenset([',']),
  71. frozenset(['%']),
  72. frozenset([',', '(', '[', '{']),
  73. frozenset(['%', '(', '[', '{']),
  74. frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
  75. frozenset(['%', '+', '-', '*', '/', '//']),
  76. ])
  77. DEFAULT_IGNORE = 'E226,E24,W503' # TODO: use pycodestyle.DEFAULT_IGNORE
  78. DEFAULT_INDENT_SIZE = 4
  79. SELECTED_GLOBAL_FIXED_METHOD_CODES = ['W602', ]
  80. # W602 is handled separately due to the need to avoid "with_traceback".
  81. CODE_TO_2TO3 = {
  82. 'E231': ['ws_comma'],
  83. 'E721': ['idioms'],
  84. 'W601': ['has_key'],
  85. 'W603': ['ne'],
  86. 'W604': ['repr'],
  87. 'W690': ['apply',
  88. 'except',
  89. 'exitfunc',
  90. 'numliterals',
  91. 'operator',
  92. 'paren',
  93. 'reduce',
  94. 'renames',
  95. 'standarderror',
  96. 'sys_exc',
  97. 'throw',
  98. 'tuple_params',
  99. 'xreadlines']}
  100. if sys.platform == 'win32': # pragma: no cover
  101. DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
  102. else:
  103. DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
  104. os.path.expanduser('~/.config'), 'pep8')
  105. PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
  106. MAX_PYTHON_FILE_DETECTION_BYTES = 1024
  107. def open_with_encoding(filename,
  108. encoding=None, mode='r', limit_byte_check=-1):
  109. """Return opened file with a specific encoding."""
  110. if not encoding:
  111. encoding = detect_encoding(filename, limit_byte_check=limit_byte_check)
  112. return io.open(filename, mode=mode, encoding=encoding,
  113. newline='') # Preserve line endings
  114. def detect_encoding(filename, limit_byte_check=-1):
  115. """Return file encoding."""
  116. try:
  117. with open(filename, 'rb') as input_file:
  118. from lib2to3.pgen2 import tokenize as lib2to3_tokenize
  119. encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
  120. with open_with_encoding(filename, encoding) as test_file:
  121. test_file.read(limit_byte_check)
  122. return encoding
  123. except (LookupError, SyntaxError, UnicodeDecodeError):
  124. return 'latin-1'
  125. def readlines_from_file(filename):
  126. """Return contents of file."""
  127. with open_with_encoding(filename) as input_file:
  128. return input_file.readlines()
  129. def extended_blank_lines(logical_line,
  130. blank_lines,
  131. blank_before,
  132. indent_level,
  133. previous_logical):
  134. """Check for missing blank lines after class declaration."""
  135. if previous_logical.startswith('def '):
  136. if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line):
  137. yield (0, 'E303 too many blank lines ({})'.format(blank_lines))
  138. elif pycodestyle.DOCSTRING_REGEX.match(previous_logical):
  139. # Missing blank line between class docstring and method declaration.
  140. if (
  141. indent_level and
  142. not blank_lines and
  143. not blank_before and
  144. logical_line.startswith(('def ')) and
  145. '(self' in logical_line
  146. ):
  147. yield (0, 'E301 expected 1 blank line, found 0')
  148. pycodestyle.register_check(extended_blank_lines)
  149. def continued_indentation(logical_line, tokens, indent_level, hang_closing,
  150. indent_char, noqa):
  151. """Override pycodestyle's function to provide indentation information."""
  152. first_row = tokens[0][2][0]
  153. nrows = 1 + tokens[-1][2][0] - first_row
  154. if noqa or nrows == 1:
  155. return
  156. # indent_next tells us whether the next block is indented. Assuming
  157. # that it is indented by 4 spaces, then we should not allow 4-space
  158. # indents on the final continuation line. In turn, some other
  159. # indents are allowed to have an extra 4 spaces.
  160. indent_next = logical_line.endswith(':')
  161. row = depth = 0
  162. valid_hangs = (
  163. (DEFAULT_INDENT_SIZE,)
  164. if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
  165. 2 * DEFAULT_INDENT_SIZE)
  166. )
  167. # Remember how many brackets were opened on each line.
  168. parens = [0] * nrows
  169. # Relative indents of physical lines.
  170. rel_indent = [0] * nrows
  171. # For each depth, collect a list of opening rows.
  172. open_rows = [[0]]
  173. # For each depth, memorize the hanging indentation.
  174. hangs = [None]
  175. # Visual indents.
  176. indent_chances = {}
  177. last_indent = tokens[0][2]
  178. indent = [last_indent[1]]
  179. last_token_multiline = None
  180. line = None
  181. last_line = ''
  182. last_line_begins_with_multiline = False
  183. for token_type, text, start, end, line in tokens:
  184. newline = row < start[0] - first_row
  185. if newline:
  186. row = start[0] - first_row
  187. newline = (not last_token_multiline and
  188. token_type not in (tokenize.NL, tokenize.NEWLINE))
  189. last_line_begins_with_multiline = last_token_multiline
  190. if newline:
  191. # This is the beginning of a continuation line.
  192. last_indent = start
  193. # Record the initial indent.
  194. rel_indent[row] = pycodestyle.expand_indent(line) - indent_level
  195. # Identify closing bracket.
  196. close_bracket = (token_type == tokenize.OP and text in ']})')
  197. # Is the indent relative to an opening bracket line?
  198. for open_row in reversed(open_rows[depth]):
  199. hang = rel_indent[row] - rel_indent[open_row]
  200. hanging_indent = hang in valid_hangs
  201. if hanging_indent:
  202. break
  203. if hangs[depth]:
  204. hanging_indent = (hang == hangs[depth])
  205. visual_indent = (not close_bracket and hang > 0 and
  206. indent_chances.get(start[1]))
  207. if close_bracket and indent[depth]:
  208. # Closing bracket for visual indent.
  209. if start[1] != indent[depth]:
  210. yield (start, 'E124 {}'.format(indent[depth]))
  211. elif close_bracket and not hang:
  212. # closing bracket matches indentation of opening bracket's line
  213. if hang_closing:
  214. yield (start, 'E133 {}'.format(indent[depth]))
  215. elif indent[depth] and start[1] < indent[depth]:
  216. # Visual indent is broken.
  217. yield (start, 'E128 {}'.format(indent[depth]))
  218. elif (hanging_indent or
  219. (indent_next and
  220. rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
  221. # Hanging indent is verified.
  222. if close_bracket and not hang_closing:
  223. yield (start, 'E123 {}'.format(indent_level +
  224. rel_indent[open_row]))
  225. hangs[depth] = hang
  226. elif visual_indent is True:
  227. # Visual indent is verified.
  228. indent[depth] = start[1]
  229. elif visual_indent in (text, unicode):
  230. # Ignore token lined up with matching one from a previous line.
  231. pass
  232. else:
  233. one_indented = (indent_level + rel_indent[open_row] +
  234. DEFAULT_INDENT_SIZE)
  235. # Indent is broken.
  236. if hang <= 0:
  237. error = ('E122', one_indented)
  238. elif indent[depth]:
  239. error = ('E127', indent[depth])
  240. elif not close_bracket and hangs[depth]:
  241. error = ('E131', one_indented)
  242. elif hang > DEFAULT_INDENT_SIZE:
  243. error = ('E126', one_indented)
  244. else:
  245. hangs[depth] = hang
  246. error = ('E121', one_indented)
  247. yield (start, '{} {}'.format(*error))
  248. # Look for visual indenting.
  249. if (
  250. parens[row] and
  251. token_type not in (tokenize.NL, tokenize.COMMENT) and
  252. not indent[depth]
  253. ):
  254. indent[depth] = start[1]
  255. indent_chances[start[1]] = True
  256. # Deal with implicit string concatenation.
  257. elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
  258. text in ('u', 'ur', 'b', 'br')):
  259. indent_chances[start[1]] = unicode
  260. # Special case for the "if" statement because len("if (") is equal to
  261. # 4.
  262. elif not indent_chances and not row and not depth and text == 'if':
  263. indent_chances[end[1] + 1] = True
  264. elif text == ':' and line[end[1]:].isspace():
  265. open_rows[depth].append(row)
  266. # Keep track of bracket depth.
  267. if token_type == tokenize.OP:
  268. if text in '([{':
  269. depth += 1
  270. indent.append(0)
  271. hangs.append(None)
  272. if len(open_rows) == depth:
  273. open_rows.append([])
  274. open_rows[depth].append(row)
  275. parens[row] += 1
  276. elif text in ')]}' and depth > 0:
  277. # Parent indents should not be more than this one.
  278. prev_indent = indent.pop() or last_indent[1]
  279. hangs.pop()
  280. for d in range(depth):
  281. if indent[d] > prev_indent:
  282. indent[d] = 0
  283. for ind in list(indent_chances):
  284. if ind >= prev_indent:
  285. del indent_chances[ind]
  286. del open_rows[depth + 1:]
  287. depth -= 1
  288. if depth:
  289. indent_chances[indent[depth]] = True
  290. for idx in range(row, -1, -1):
  291. if parens[idx]:
  292. parens[idx] -= 1
  293. break
  294. assert len(indent) == depth + 1
  295. if (
  296. start[1] not in indent_chances and
  297. # This is for purposes of speeding up E121 (GitHub #90).
  298. not last_line.rstrip().endswith(',')
  299. ):
  300. # Allow to line up tokens.
  301. indent_chances[start[1]] = text
  302. last_token_multiline = (start[0] != end[0])
  303. if last_token_multiline:
  304. rel_indent[end[0] - first_row] = rel_indent[row]
  305. last_line = line
  306. if (
  307. indent_next and
  308. not last_line_begins_with_multiline and
  309. pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
  310. ):
  311. pos = (start[0], indent[0] + 4)
  312. desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE
  313. if visual_indent:
  314. yield (pos, 'E129 {}'.format(desired_indent))
  315. else:
  316. yield (pos, 'E125 {}'.format(desired_indent))
  317. del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation]
  318. pycodestyle.register_check(continued_indentation)
  319. class FixPEP8(object):
  320. """Fix invalid code.
  321. Fixer methods are prefixed "fix_". The _fix_source() method looks for these
  322. automatically.
  323. The fixer method can take either one or two arguments (in addition to
  324. self). The first argument is "result", which is the error information from
  325. pycodestyle. The second argument, "logical", is required only for
  326. logical-line fixes.
  327. The fixer method can return the list of modified lines or None. An empty
  328. list would mean that no changes were made. None would mean that only the
  329. line reported in the pycodestyle error was modified. Note that the modified
  330. line numbers that are returned are indexed at 1. This typically would
  331. correspond with the line number reported in the pycodestyle error
  332. information.
  333. [fixed method list]
  334. - e111,e114,e115,e116
  335. - e121,e122,e123,e124,e125,e126,e127,e128,e129
  336. - e201,e202,e203
  337. - e211
  338. - e221,e222,e223,e224,e225
  339. - e231
  340. - e251
  341. - e261,e262
  342. - e271,e272,e273,e274
  343. - e301,e302,e303,e304,e306
  344. - e401
  345. - e502
  346. - e701,e702,e703,e704
  347. - e711,e712,e713,e714
  348. - e722
  349. - e731
  350. - w291
  351. - w503
  352. """
  353. def __init__(self, filename,
  354. options,
  355. contents=None,
  356. long_line_ignore_cache=None):
  357. self.filename = filename
  358. if contents is None:
  359. self.source = readlines_from_file(filename)
  360. else:
  361. sio = io.StringIO(contents)
  362. self.source = sio.readlines()
  363. self.options = options
  364. self.indent_word = _get_indentword(''.join(self.source))
  365. self.long_line_ignore_cache = (
  366. set() if long_line_ignore_cache is None
  367. else long_line_ignore_cache)
  368. # Many fixers are the same even though pycodestyle categorizes them
  369. # differently.
  370. self.fix_e115 = self.fix_e112
  371. self.fix_e116 = self.fix_e113
  372. self.fix_e121 = self._fix_reindent
  373. self.fix_e122 = self._fix_reindent
  374. self.fix_e123 = self._fix_reindent
  375. self.fix_e124 = self._fix_reindent
  376. self.fix_e126 = self._fix_reindent
  377. self.fix_e127 = self._fix_reindent
  378. self.fix_e128 = self._fix_reindent
  379. self.fix_e129 = self._fix_reindent
  380. self.fix_e133 = self.fix_e131
  381. self.fix_e202 = self.fix_e201
  382. self.fix_e203 = self.fix_e201
  383. self.fix_e211 = self.fix_e201
  384. self.fix_e221 = self.fix_e271
  385. self.fix_e222 = self.fix_e271
  386. self.fix_e223 = self.fix_e271
  387. self.fix_e226 = self.fix_e225
  388. self.fix_e227 = self.fix_e225
  389. self.fix_e228 = self.fix_e225
  390. self.fix_e241 = self.fix_e271
  391. self.fix_e242 = self.fix_e224
  392. self.fix_e261 = self.fix_e262
  393. self.fix_e272 = self.fix_e271
  394. self.fix_e273 = self.fix_e271
  395. self.fix_e274 = self.fix_e271
  396. self.fix_e306 = self.fix_e301
  397. self.fix_e501 = (
  398. self.fix_long_line_logically if
  399. options and (options.aggressive >= 2 or options.experimental) else
  400. self.fix_long_line_physically)
  401. self.fix_e703 = self.fix_e702
  402. self.fix_w293 = self.fix_w291
  403. def _fix_source(self, results):
  404. try:
  405. (logical_start, logical_end) = _find_logical(self.source)
  406. logical_support = True
  407. except (SyntaxError, tokenize.TokenError): # pragma: no cover
  408. logical_support = False
  409. completed_lines = set()
  410. for result in sorted(results, key=_priority_key):
  411. if result['line'] in completed_lines:
  412. continue
  413. fixed_methodname = 'fix_' + result['id'].lower()
  414. if hasattr(self, fixed_methodname):
  415. fix = getattr(self, fixed_methodname)
  416. line_index = result['line'] - 1
  417. original_line = self.source[line_index]
  418. is_logical_fix = len(_get_parameters(fix)) > 2
  419. if is_logical_fix:
  420. logical = None
  421. if logical_support:
  422. logical = _get_logical(self.source,
  423. result,
  424. logical_start,
  425. logical_end)
  426. if logical and set(range(
  427. logical[0][0] + 1,
  428. logical[1][0] + 1)).intersection(
  429. completed_lines):
  430. continue
  431. modified_lines = fix(result, logical)
  432. else:
  433. modified_lines = fix(result)
  434. if modified_lines is None:
  435. # Force logical fixes to report what they modified.
  436. assert not is_logical_fix
  437. if self.source[line_index] == original_line:
  438. modified_lines = []
  439. if modified_lines:
  440. completed_lines.update(modified_lines)
  441. elif modified_lines == []: # Empty list means no fix
  442. if self.options.verbose >= 2:
  443. print(
  444. '---> Not fixing {error} on line {line}'.format(
  445. error=result['id'], line=result['line']),
  446. file=sys.stderr)
  447. else: # We assume one-line fix when None.
  448. completed_lines.add(result['line'])
  449. else:
  450. if self.options.verbose >= 3:
  451. print(
  452. "---> '{}' is not defined.".format(fixed_methodname),
  453. file=sys.stderr)
  454. info = result['info'].strip()
  455. print('---> {}:{}:{}:{}'.format(self.filename,
  456. result['line'],
  457. result['column'],
  458. info),
  459. file=sys.stderr)
  460. def fix(self):
  461. """Return a version of the source code with PEP 8 violations fixed."""
  462. pep8_options = {
  463. 'ignore': self.options.ignore,
  464. 'select': self.options.select,
  465. 'max_line_length': self.options.max_line_length,
  466. 'hang_closing': self.options.hang_closing,
  467. }
  468. results = _execute_pep8(pep8_options, self.source)
  469. if self.options.verbose:
  470. progress = {}
  471. for r in results:
  472. if r['id'] not in progress:
  473. progress[r['id']] = set()
  474. progress[r['id']].add(r['line'])
  475. print('---> {n} issue(s) to fix {progress}'.format(
  476. n=len(results), progress=progress), file=sys.stderr)
  477. if self.options.line_range:
  478. start, end = self.options.line_range
  479. results = [r for r in results
  480. if start <= r['line'] <= end]
  481. self._fix_source(filter_results(source=''.join(self.source),
  482. results=results,
  483. aggressive=self.options.aggressive))
  484. if self.options.line_range:
  485. # If number of lines has changed then change line_range.
  486. count = sum(sline.count('\n')
  487. for sline in self.source[start - 1:end])
  488. self.options.line_range[1] = start + count - 1
  489. return ''.join(self.source)
  490. def _fix_reindent(self, result):
  491. """Fix a badly indented line.
  492. This is done by adding or removing from its initial indent only.
  493. """
  494. num_indent_spaces = int(result['info'].split()[1])
  495. line_index = result['line'] - 1
  496. target = self.source[line_index]
  497. self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
  498. def fix_e112(self, result):
  499. """Fix under-indented comments."""
  500. line_index = result['line'] - 1
  501. target = self.source[line_index]
  502. if not target.lstrip().startswith('#'):
  503. # Don't screw with invalid syntax.
  504. return []
  505. self.source[line_index] = self.indent_word + target
  506. def fix_e113(self, result):
  507. """Fix over-indented comments."""
  508. line_index = result['line'] - 1
  509. target = self.source[line_index]
  510. indent = _get_indentation(target)
  511. stripped = target.lstrip()
  512. if not stripped.startswith('#'):
  513. # Don't screw with invalid syntax.
  514. return []
  515. self.source[line_index] = indent[1:] + stripped
  516. def fix_e125(self, result):
  517. """Fix indentation undistinguish from the next logical line."""
  518. num_indent_spaces = int(result['info'].split()[1])
  519. line_index = result['line'] - 1
  520. target = self.source[line_index]
  521. spaces_to_add = num_indent_spaces - len(_get_indentation(target))
  522. indent = len(_get_indentation(target))
  523. modified_lines = []
  524. while len(_get_indentation(self.source[line_index])) >= indent:
  525. self.source[line_index] = (' ' * spaces_to_add +
  526. self.source[line_index])
  527. modified_lines.append(1 + line_index) # Line indexed at 1.
  528. line_index -= 1
  529. return modified_lines
  530. def fix_e131(self, result):
  531. """Fix indentation undistinguish from the next logical line."""
  532. num_indent_spaces = int(result['info'].split()[1])
  533. line_index = result['line'] - 1
  534. target = self.source[line_index]
  535. spaces_to_add = num_indent_spaces - len(_get_indentation(target))
  536. if spaces_to_add >= 0:
  537. self.source[line_index] = (' ' * spaces_to_add +
  538. self.source[line_index])
  539. else:
  540. offset = abs(spaces_to_add)
  541. self.source[line_index] = self.source[line_index][offset:]
  542. def fix_e201(self, result):
  543. """Remove extraneous whitespace."""
  544. line_index = result['line'] - 1
  545. target = self.source[line_index]
  546. offset = result['column'] - 1
  547. fixed = fix_whitespace(target,
  548. offset=offset,
  549. replacement='')
  550. self.source[line_index] = fixed
  551. def fix_e224(self, result):
  552. """Remove extraneous whitespace around operator."""
  553. target = self.source[result['line'] - 1]
  554. offset = result['column'] - 1
  555. fixed = target[:offset] + target[offset:].replace('\t', ' ')
  556. self.source[result['line'] - 1] = fixed
  557. def fix_e225(self, result):
  558. """Fix missing whitespace around operator."""
  559. target = self.source[result['line'] - 1]
  560. offset = result['column'] - 1
  561. fixed = target[:offset] + ' ' + target[offset:]
  562. # Only proceed if non-whitespace characters match.
  563. # And make sure we don't break the indentation.
  564. if (
  565. fixed.replace(' ', '') == target.replace(' ', '') and
  566. _get_indentation(fixed) == _get_indentation(target)
  567. ):
  568. self.source[result['line'] - 1] = fixed
  569. error_code = result.get('id', 0)
  570. try:
  571. ts = generate_tokens(fixed)
  572. except (SyntaxError, tokenize.TokenError):
  573. return
  574. if not check_syntax(fixed.lstrip()):
  575. return
  576. errors = list(
  577. pycodestyle.missing_whitespace_around_operator(fixed, ts))
  578. for e in reversed(errors):
  579. if error_code != e[1].split()[0]:
  580. continue
  581. offset = e[0][1]
  582. fixed = fixed[:offset] + ' ' + fixed[offset:]
  583. self.source[result['line'] - 1] = fixed
  584. else:
  585. return []
  586. def fix_e231(self, result):
  587. """Add missing whitespace."""
  588. line_index = result['line'] - 1
  589. target = self.source[line_index]
  590. offset = result['column']
  591. fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip()
  592. self.source[line_index] = fixed
  593. def fix_e251(self, result):
  594. """Remove whitespace around parameter '=' sign."""
  595. line_index = result['line'] - 1
  596. target = self.source[line_index]
  597. # This is necessary since pycodestyle sometimes reports columns that
  598. # goes past the end of the physical line. This happens in cases like,
  599. # foo(bar\n=None)
  600. c = min(result['column'] - 1,
  601. len(target) - 1)
  602. if target[c].strip():
  603. fixed = target
  604. else:
  605. fixed = target[:c].rstrip() + target[c:].lstrip()
  606. # There could be an escaped newline
  607. #
  608. # def foo(a=\
  609. # 1)
  610. if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
  611. self.source[line_index] = fixed.rstrip('\n\r \t\\')
  612. self.source[line_index + 1] = self.source[line_index + 1].lstrip()
  613. return [line_index + 1, line_index + 2] # Line indexed at 1
  614. self.source[result['line'] - 1] = fixed
  615. def fix_e262(self, result):
  616. """Fix spacing after comment hash."""
  617. target = self.source[result['line'] - 1]
  618. offset = result['column']
  619. code = target[:offset].rstrip(' \t#')
  620. comment = target[offset:].lstrip(' \t#')
  621. fixed = code + (' # ' + comment if comment.strip() else '\n')
  622. self.source[result['line'] - 1] = fixed
  623. def fix_e271(self, result):
  624. """Fix extraneous whitespace around keywords."""
  625. line_index = result['line'] - 1
  626. target = self.source[line_index]
  627. offset = result['column'] - 1
  628. fixed = fix_whitespace(target,
  629. offset=offset,
  630. replacement=' ')
  631. if fixed == target:
  632. return []
  633. else:
  634. self.source[line_index] = fixed
  635. def fix_e301(self, result):
  636. """Add missing blank line."""
  637. cr = '\n'
  638. self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
  639. def fix_e302(self, result):
  640. """Add missing 2 blank lines."""
  641. add_linenum = 2 - int(result['info'].split()[-1])
  642. cr = '\n' * add_linenum
  643. self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
  644. def fix_e303(self, result):
  645. """Remove extra blank lines."""
  646. delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
  647. delete_linenum = max(1, delete_linenum)
  648. # We need to count because pycodestyle reports an offset line number if
  649. # there are comments.
  650. cnt = 0
  651. line = result['line'] - 2
  652. modified_lines = []
  653. while cnt < delete_linenum and line >= 0:
  654. if not self.source[line].strip():
  655. self.source[line] = ''
  656. modified_lines.append(1 + line) # Line indexed at 1
  657. cnt += 1
  658. line -= 1
  659. return modified_lines
  660. def fix_e304(self, result):
  661. """Remove blank line following function decorator."""
  662. line = result['line'] - 2
  663. if not self.source[line].strip():
  664. self.source[line] = ''
  665. def fix_e305(self, result):
  666. """Add missing 2 blank lines after end of function or class."""
  667. cr = '\n'
  668. # check comment line
  669. offset = result['line'] - 2
  670. while True:
  671. if offset < 0:
  672. break
  673. line = self.source[offset].lstrip()
  674. if not line:
  675. break
  676. if line[0] != '#':
  677. break
  678. offset -= 1
  679. offset += 1
  680. self.source[offset] = cr + self.source[offset]
  681. return [1 + offset] # Line indexed at 1.
  682. def fix_e401(self, result):
  683. """Put imports on separate lines."""
  684. line_index = result['line'] - 1
  685. target = self.source[line_index]
  686. offset = result['column'] - 1
  687. if not target.lstrip().startswith('import'):
  688. return []
  689. indentation = re.split(pattern=r'\bimport\b',
  690. string=target, maxsplit=1)[0]
  691. fixed = (target[:offset].rstrip('\t ,') + '\n' +
  692. indentation + 'import ' + target[offset:].lstrip('\t ,'))
  693. self.source[line_index] = fixed
  694. def fix_long_line_logically(self, result, logical):
  695. """Try to make lines fit within --max-line-length characters."""
  696. if (
  697. not logical or
  698. len(logical[2]) == 1 or
  699. self.source[result['line'] - 1].lstrip().startswith('#')
  700. ):
  701. return self.fix_long_line_physically(result)
  702. start_line_index = logical[0][0]
  703. end_line_index = logical[1][0]
  704. logical_lines = logical[2]
  705. previous_line = get_item(self.source, start_line_index - 1, default='')
  706. next_line = get_item(self.source, end_line_index + 1, default='')
  707. single_line = join_logical_line(''.join(logical_lines))
  708. try:
  709. fixed = self.fix_long_line(
  710. target=single_line,
  711. previous_line=previous_line,
  712. next_line=next_line,
  713. original=''.join(logical_lines))
  714. except (SyntaxError, tokenize.TokenError):
  715. return self.fix_long_line_physically(result)
  716. if fixed:
  717. for line_index in range(start_line_index, end_line_index + 1):
  718. self.source[line_index] = ''
  719. self.source[start_line_index] = fixed
  720. return range(start_line_index + 1, end_line_index + 1)
  721. return []
  722. def fix_long_line_physically(self, result):
  723. """Try to make lines fit within --max-line-length characters."""
  724. line_index = result['line'] - 1
  725. target = self.source[line_index]
  726. previous_line = get_item(self.source, line_index - 1, default='')
  727. next_line = get_item(self.source, line_index + 1, default='')
  728. try:
  729. fixed = self.fix_long_line(
  730. target=target,
  731. previous_line=previous_line,
  732. next_line=next_line,
  733. original=target)
  734. except (SyntaxError, tokenize.TokenError):
  735. return []
  736. if fixed:
  737. self.source[line_index] = fixed
  738. return [line_index + 1]
  739. return []
  740. def fix_long_line(self, target, previous_line,
  741. next_line, original):
  742. cache_entry = (target, previous_line, next_line)
  743. if cache_entry in self.long_line_ignore_cache:
  744. return []
  745. if target.lstrip().startswith('#'):
  746. if self.options.aggressive:
  747. # Wrap commented lines.
  748. return shorten_comment(
  749. line=target,
  750. max_line_length=self.options.max_line_length,
  751. last_comment=not next_line.lstrip().startswith('#'))
  752. return []
  753. fixed = get_fixed_long_line(
  754. target=target,
  755. previous_line=previous_line,
  756. original=original,
  757. indent_word=self.indent_word,
  758. max_line_length=self.options.max_line_length,
  759. aggressive=self.options.aggressive,
  760. experimental=self.options.experimental,
  761. verbose=self.options.verbose)
  762. if fixed and not code_almost_equal(original, fixed):
  763. return fixed
  764. self.long_line_ignore_cache.add(cache_entry)
  765. return None
  766. def fix_e502(self, result):
  767. """Remove extraneous escape of newline."""
  768. (line_index, _, target) = get_index_offset_contents(result,
  769. self.source)
  770. self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
  771. def fix_e701(self, result):
  772. """Put colon-separated compound statement on separate lines."""
  773. line_index = result['line'] - 1
  774. target = self.source[line_index]
  775. c = result['column']
  776. fixed_source = (target[:c] + '\n' +
  777. _get_indentation(target) + self.indent_word +
  778. target[c:].lstrip('\n\r \t\\'))
  779. self.source[result['line'] - 1] = fixed_source
  780. return [result['line'], result['line'] + 1]
  781. def fix_e702(self, result, logical):
  782. """Put semicolon-separated compound statement on separate lines."""
  783. if not logical:
  784. return [] # pragma: no cover
  785. logical_lines = logical[2]
  786. # Avoid applying this when indented.
  787. # https://docs.python.org/reference/compound_stmts.html
  788. for line in logical_lines:
  789. if ':' in line and STARTSWITH_DEF_REGEX.match(line):
  790. return []
  791. line_index = result['line'] - 1
  792. target = self.source[line_index]
  793. if target.rstrip().endswith('\\'):
  794. # Normalize '1; \\\n2' into '1; 2'.
  795. self.source[line_index] = target.rstrip('\n \r\t\\')
  796. self.source[line_index + 1] = self.source[line_index + 1].lstrip()
  797. return [line_index + 1, line_index + 2]
  798. if target.rstrip().endswith(';'):
  799. self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
  800. return [line_index + 1]
  801. offset = result['column'] - 1
  802. first = target[:offset].rstrip(';').rstrip()
  803. second = (_get_indentation(logical_lines[0]) +
  804. target[offset:].lstrip(';').lstrip())
  805. # Find inline comment.
  806. inline_comment = None
  807. if target[offset:].lstrip(';').lstrip()[:2] == '# ':
  808. inline_comment = target[offset:].lstrip(';')
  809. if inline_comment:
  810. self.source[line_index] = first + inline_comment
  811. else:
  812. self.source[line_index] = first + '\n' + second
  813. return [line_index + 1]
  814. def fix_e704(self, result):
  815. """Fix multiple statements on one line def"""
  816. (line_index, _, target) = get_index_offset_contents(result,
  817. self.source)
  818. match = STARTSWITH_DEF_REGEX.match(target)
  819. if match:
  820. self.source[line_index] = '{}\n{}{}'.format(
  821. match.group(0),
  822. _get_indentation(target) + self.indent_word,
  823. target[match.end(0):].lstrip())
  824. def fix_e711(self, result):
  825. """Fix comparison with None."""
  826. (line_index, offset, target) = get_index_offset_contents(result,
  827. self.source)
  828. right_offset = offset + 2
  829. if right_offset >= len(target):
  830. return []
  831. left = target[:offset].rstrip()
  832. center = target[offset:right_offset]
  833. right = target[right_offset:].lstrip()
  834. if not right.startswith('None'):
  835. return []
  836. if center.strip() == '==':
  837. new_center = 'is'
  838. elif center.strip() == '!=':
  839. new_center = 'is not'
  840. else:
  841. return []
  842. self.source[line_index] = ' '.join([left, new_center, right])
  843. def fix_e712(self, result):
  844. """Fix (trivial case of) comparison with boolean."""
  845. (line_index, offset, target) = get_index_offset_contents(result,
  846. self.source)
  847. # Handle very easy "not" special cases.
  848. if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target):
  849. self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:',
  850. r'if not \1:', target, count=1)
  851. elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target):
  852. self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:',
  853. r'if not \1:', target, count=1)
  854. else:
  855. right_offset = offset + 2
  856. if right_offset >= len(target):
  857. return []
  858. left = target[:offset].rstrip()
  859. center = target[offset:right_offset]
  860. right = target[right_offset:].lstrip()
  861. # Handle simple cases only.
  862. new_right = None
  863. if center.strip() == '==':
  864. if re.match(r'\bTrue\b', right):
  865. new_right = re.sub(r'\bTrue\b *', '', right, count=1)
  866. elif center.strip() == '!=':
  867. if re.match(r'\bFalse\b', right):
  868. new_right = re.sub(r'\bFalse\b *', '', right, count=1)
  869. if new_right is None:
  870. return []
  871. if new_right[0].isalnum():
  872. new_right = ' ' + new_right
  873. self.source[line_index] = left + new_right
  874. def fix_e713(self, result):
  875. """Fix (trivial case of) non-membership check."""
  876. (line_index, offset, target) = get_index_offset_contents(result,
  877. self.source)
  878. # to convert once 'not in' -> 'in'
  879. before_target = target[:offset]
  880. target = target[offset:]
  881. match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
  882. notin_pos_start, notin_pos_end = 0, 0
  883. if match_notin:
  884. notin_pos_start = match_notin.start(1)
  885. notin_pos_end = match_notin.end()
  886. target = '{}{} {}'.format(
  887. target[:notin_pos_start], 'in', target[notin_pos_end:])
  888. # fix 'not in'
  889. match = COMPARE_NEGATIVE_REGEX.search(target)
  890. if match:
  891. if match.group(3) == 'in':
  892. pos_start = match.start(1)
  893. new_target = '{5}{0}{1} {2} {3} {4}'.format(
  894. target[:pos_start], match.group(2), match.group(1),
  895. match.group(3), target[match.end():], before_target)
  896. if match_notin:
  897. # revert 'in' -> 'not in'
  898. pos_start = notin_pos_start + offset
  899. pos_end = notin_pos_end + offset - 4 # len('not ')
  900. new_target = '{}{} {}'.format(
  901. new_target[:pos_start], 'not in', new_target[pos_end:])
  902. self.source[line_index] = new_target
  903. def fix_e714(self, result):
  904. """Fix object identity should be 'is not' case."""
  905. (line_index, offset, target) = get_index_offset_contents(result,
  906. self.source)
  907. # to convert once 'is not' -> 'is'
  908. before_target = target[:offset]
  909. target = target[offset:]
  910. match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
  911. isnot_pos_start, isnot_pos_end = 0, 0
  912. if match_isnot:
  913. isnot_pos_start = match_isnot.start(1)
  914. isnot_pos_end = match_isnot.end()
  915. target = '{}{} {}'.format(
  916. target[:isnot_pos_start], 'in', target[isnot_pos_end:])
  917. match = COMPARE_NEGATIVE_REGEX.search(target)
  918. if match:
  919. if match.group(3).startswith('is'):
  920. pos_start = match.start(1)
  921. new_target = '{5}{0}{1} {2} {3} {4}'.format(
  922. target[:pos_start], match.group(2), match.group(3),
  923. match.group(1), target[match.end():], before_target)
  924. if match_isnot:
  925. # revert 'is' -> 'is not'
  926. pos_start = isnot_pos_start + offset
  927. pos_end = isnot_pos_end + offset - 4 # len('not ')
  928. new_target = '{}{} {}'.format(
  929. new_target[:pos_start], 'is not', new_target[pos_end:])
  930. self.source[line_index] = new_target
  931. def fix_e722(self, result):
  932. """fix bare except"""
  933. (line_index, _, target) = get_index_offset_contents(result,
  934. self.source)
  935. match = BARE_EXCEPT_REGEX.search(target)
  936. if match:
  937. self.source[line_index] = '{}{}{}'.format(
  938. target[:result['column'] - 1], "except BaseException:",
  939. target[match.end():])
  940. def fix_e731(self, result):
  941. """Fix do not assign a lambda expression check."""
  942. (line_index, _, target) = get_index_offset_contents(result,
  943. self.source)
  944. match = LAMBDA_REGEX.search(target)
  945. if match:
  946. end = match.end()
  947. self.source[line_index] = '{}def {}({}): return {}'.format(
  948. target[:match.start(0)], match.group(1), match.group(2),
  949. target[end:].lstrip())
  950. def fix_w291(self, result):
  951. """Remove trailing whitespace."""
  952. fixed_line = self.source[result['line'] - 1].rstrip()
  953. self.source[result['line'] - 1] = fixed_line + '\n'
  954. def fix_w391(self, _):
  955. """Remove trailing blank lines."""
  956. blank_count = 0
  957. for line in reversed(self.source):
  958. line = line.rstrip()
  959. if line:
  960. break
  961. else:
  962. blank_count += 1
  963. original_length = len(self.source)
  964. self.source = self.source[:original_length - blank_count]
  965. return range(1, 1 + original_length)
  966. def fix_w503(self, result):
  967. (line_index, _, target) = get_index_offset_contents(result,
  968. self.source)
  969. one_string_token = target.split()[0]
  970. try:
  971. ts = generate_tokens(one_string_token)
  972. except (SyntaxError, tokenize.TokenError):
  973. return
  974. if not _is_binary_operator(ts[0][0], one_string_token):
  975. return
  976. # find comment
  977. comment_index = 0
  978. for i in range(5):
  979. # NOTE: try to parse code in 5 times
  980. if (line_index - i) < 0:
  981. break
  982. from_index = line_index - i - 1
  983. to_index = line_index + 1
  984. try:
  985. ts = generate_tokens("".join(self.source[from_index:to_index]))
  986. except (SyntaxError, tokenize.TokenError):
  987. continue
  988. newline_count = 0
  989. newline_index = []
  990. for index, t in enumerate(ts):
  991. if t[0] in (tokenize.NEWLINE, tokenize.NL):
  992. newline_index.append(index)
  993. newline_count += 1
  994. if newline_count > 2:
  995. tts = ts[newline_index[-3]:]
  996. else:
  997. tts = ts
  998. old = []
  999. for t in tts:
  1000. if tokenize.COMMENT == t[0] and old:
  1001. comment_index = old[3][1]
  1002. break
  1003. old = t
  1004. break
  1005. i = target.index(one_string_token)
  1006. self.source[line_index] = '{}{}'.format(
  1007. target[:i], target[i + len(one_string_token):])
  1008. nl = find_newline(self.source[line_index - 1:line_index])
  1009. before_line = self.source[line_index - 1]
  1010. bl = before_line.index(nl)
  1011. if comment_index:
  1012. self.source[line_index - 1] = '{} {} {}'.format(
  1013. before_line[:comment_index], one_string_token,
  1014. before_line[comment_index + 1:])
  1015. else:
  1016. self.source[line_index - 1] = '{} {}{}'.format(
  1017. before_line[:bl], one_string_token, before_line[bl:])
  1018. def get_index_offset_contents(result, source):
  1019. """Return (line_index, column_offset, line_contents)."""
  1020. line_index = result['line'] - 1
  1021. return (line_index,
  1022. result['column'] - 1,
  1023. source[line_index])
  1024. def get_fixed_long_line(target, previous_line, original,
  1025. indent_word=' ', max_line_length=79,
  1026. aggressive=False, experimental=False, verbose=False):
  1027. """Break up long line and return result.
  1028. Do this by generating multiple reformatted candidates and then
  1029. ranking the candidates to heuristically select the best option.
  1030. """
  1031. indent = _get_indentation(target)
  1032. source = target[len(indent):]
  1033. assert source.lstrip() == source
  1034. assert not target.lstrip().startswith('#')
  1035. # Check for partial multiline.
  1036. tokens = list(generate_tokens(source))
  1037. candidates = shorten_line(
  1038. tokens, source, indent,
  1039. indent_word,
  1040. max_line_length,
  1041. aggressive=aggressive,
  1042. experimental=experimental,
  1043. previous_line=previous_line)
  1044. # Also sort alphabetically as a tie breaker (for determinism).
  1045. candidates = sorted(
  1046. sorted(set(candidates).union([target, original])),
  1047. key=lambda x: line_shortening_rank(
  1048. x,
  1049. indent_word,
  1050. max_line_length,
  1051. experimental=experimental))
  1052. if verbose >= 4:
  1053. print(('-' * 79 + '\n').join([''] + candidates + ['']),
  1054. file=wrap_output(sys.stderr, 'utf-8'))
  1055. if candidates:
  1056. best_candidate = candidates[0]
  1057. # Don't allow things to get longer.
  1058. if longest_line_length(best_candidate) > longest_line_length(original):
  1059. return None
  1060. return best_candidate
  1061. def longest_line_length(code):
  1062. """Return length of longest line."""
  1063. return max(len(line) for line in code.splitlines())
  1064. def join_logical_line(logical_line):
  1065. """Return single line based on logical line input."""
  1066. indentation = _get_indentation(logical_line)
  1067. return indentation + untokenize_without_newlines(
  1068. generate_tokens(logical_line.lstrip())) + '\n'
  1069. def untokenize_without_newlines(tokens):
  1070. """Return source code based on tokens."""
  1071. text = ''
  1072. last_row = 0
  1073. last_column = -1
  1074. for t in tokens:
  1075. token_string = t[1]
  1076. (start_row, start_column) = t[2]
  1077. (end_row, end_column) = t[3]
  1078. if start_row > last_row:
  1079. last_column = 0
  1080. if (
  1081. (start_column > last_column or token_string == '\n') and
  1082. not text.endswith(' ')
  1083. ):
  1084. text += ' '
  1085. if token_string != '\n':
  1086. text += token_string
  1087. last_row = end_row
  1088. last_column = end_column
  1089. return text.rstrip()
  1090. def _find_logical(source_lines):
  1091. # Make a variable which is the index of all the starts of lines.
  1092. logical_start = []
  1093. logical_end = []
  1094. last_newline = True
  1095. parens = 0
  1096. for t in generate_tokens(''.join(source_lines)):
  1097. if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
  1098. tokenize.INDENT, tokenize.NL,
  1099. tokenize.ENDMARKER]:
  1100. continue
  1101. if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
  1102. last_newline = True
  1103. logical_end.append((t[3][0] - 1, t[2][1]))
  1104. continue
  1105. if last_newline and not parens:
  1106. logical_start.append((t[2][0] - 1, t[2][1]))
  1107. last_newline = False
  1108. if t[0] == tokenize.OP:
  1109. if t[1] in '([{':
  1110. parens += 1
  1111. elif t[1] in '}])':
  1112. parens -= 1
  1113. return (logical_start, logical_end)
  1114. def _get_logical(source_lines, result, logical_start, logical_end):
  1115. """Return the logical line corresponding to the result.
  1116. Assumes input is already E702-clean.
  1117. """
  1118. row = result['line'] - 1
  1119. col = result['column'] - 1
  1120. ls = None
  1121. le = None
  1122. for i in range(0, len(logical_start), 1):
  1123. assert logical_end
  1124. x = logical_end[i]
  1125. if x[0] > row or (x[0] == row and x[1] > col):
  1126. le = x
  1127. ls = logical_start[i]
  1128. break
  1129. if ls is None:
  1130. return None
  1131. original = source_lines[ls[0]:le[0] + 1]
  1132. return ls, le, original
  1133. def get_item(items, index, default=None):
  1134. if 0 <= index < len(items):
  1135. return items[index]
  1136. return default
  1137. def reindent(source, indent_size):
  1138. """Reindent all lines."""
  1139. reindenter = Reindenter(source)
  1140. return reindenter.run(indent_size)
  1141. def code_almost_equal(a, b):
  1142. """Return True if code is similar.
  1143. Ignore whitespace when comparing specific line.
  1144. """
  1145. split_a = split_and_strip_non_empty_lines(a)
  1146. split_b = split_and_strip_non_empty_lines(b)
  1147. if len(split_a) != len(split_b):
  1148. return False
  1149. for (index, _) in enumerate(split_a):
  1150. if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
  1151. return False
  1152. return True
  1153. def split_and_strip_non_empty_lines(text):
  1154. """Return lines split by newline.
  1155. Ignore empty lines.
  1156. """
  1157. return [line.strip() for line in text.splitlines() if line.strip()]
  1158. def fix_e265(source, aggressive=False): # pylint: disable=unused-argument
  1159. """Format block comments."""
  1160. if '#' not in source:
  1161. # Optimization.
  1162. return source
  1163. ignored_line_numbers = multiline_string_lines(
  1164. source,
  1165. include_docstrings=True) | set(commented_out_code_lines(source))
  1166. fixed_lines = []
  1167. sio = io.StringIO(source)
  1168. for (line_number, line) in enumerate(sio.readlines(), start=1):
  1169. if (
  1170. line.lstrip().startswith('#') and
  1171. line_number not in ignored_line_numbers and
  1172. not pycodestyle.noqa(line)
  1173. ):
  1174. indentation = _get_indentation(line)
  1175. line = line.lstrip()
  1176. # Normalize beginning if not a shebang.
  1177. if len(line) > 1:
  1178. pos = next((index for index, c in enumerate(line)
  1179. if c != '#'))
  1180. if (
  1181. # Leave multiple spaces like '# ' alone.
  1182. (line[:pos].count('#') > 1 or line[1].isalnum() or
  1183. not line[1].isspace()) and
  1184. line[1] not in ':!' and
  1185. # Leave stylistic outlined blocks alone.
  1186. not line.rstrip().endswith('#')
  1187. ):
  1188. line = '# ' + line.lstrip('# \t')
  1189. fixed_lines.append(indentation + line)
  1190. else:
  1191. fixed_lines.append(line)
  1192. return ''.join(fixed_lines)
  1193. def refactor(source, fixer_names, ignore=None, filename=''):
  1194. """Return refactored code using lib2to3.
  1195. Skip if ignore string is produced in the refactored code.
  1196. """
  1197. from lib2to3 import pgen2
  1198. try:
  1199. new_text = refactor_with_2to3(source,
  1200. fixer_names=fixer_names,
  1201. filename=filename)
  1202. except (pgen2.parse.ParseError,
  1203. SyntaxError,
  1204. UnicodeDecodeError,
  1205. UnicodeEncodeError):
  1206. return source
  1207. if ignore:
  1208. if ignore in new_text and ignore not in source:
  1209. return source
  1210. return new_text
  1211. def code_to_2to3(select, ignore, where='', verbose=False):
  1212. fixes = set()
  1213. for code, fix in CODE_TO_2TO3.items():
  1214. if code_match(code, select=select, ignore=ignore):
  1215. if verbose:
  1216. print('---> Applying {} fix for {}'.format(where,
  1217. code.upper()),
  1218. file=sys.stderr)
  1219. fixes |= set(fix)
  1220. return fixes
  1221. def fix_2to3(source,
  1222. aggressive=True, select=None, ignore=None, filename='',
  1223. where='global', verbose=False):
  1224. """Fix various deprecated code (via lib2to3)."""
  1225. if not aggressive:
  1226. return source
  1227. select = select or []
  1228. ignore = ignore or []
  1229. return refactor(source,
  1230. code_to_2to3(select=select,
  1231. ignore=ignore,
  1232. where=where,
  1233. verbose=verbose),
  1234. filename=filename)
  1235. def fix_w602(source, aggressive=True):
  1236. """Fix deprecated form of raising exception."""
  1237. if not aggressive:
  1238. return source
  1239. return refactor(source, ['raise'], ignore='with_traceback')
  1240. def find_newline(source):
  1241. """Return type of newline used in source.
  1242. Input is a list of lines.
  1243. """
  1244. assert not isinstance(source, unicode)
  1245. counter = collections.defaultdict(int)
  1246. for line in source:
  1247. if line.endswith(CRLF):
  1248. counter[CRLF] += 1
  1249. elif line.endswith(CR):
  1250. counter[CR] += 1
  1251. elif line.endswith(LF):
  1252. counter[LF] += 1
  1253. return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
  1254. def _get_indentword(source):
  1255. """Return indentation type."""
  1256. indent_word = ' ' # Default in case source has no indentation
  1257. try:
  1258. for t in generate_tokens(source):
  1259. if t[0] == token.INDENT:
  1260. indent_word = t[1]
  1261. break
  1262. except (SyntaxError, tokenize.TokenError):
  1263. pass
  1264. return indent_word
  1265. def _get_indentation(line):
  1266. """Return leading whitespace."""
  1267. if line.strip():
  1268. non_whitespace_index = len(line) - len(line.lstrip())
  1269. return line[:non_whitespace_index]
  1270. return ''
  1271. def get_diff_text(old, new, filename):
  1272. """Return text of unified diff between old and new."""
  1273. newline = '\n'
  1274. diff = difflib.unified_diff(
  1275. old, new,
  1276. 'original/' + filename,
  1277. 'fixed/' + filename,
  1278. lineterm=newline)
  1279. text = ''
  1280. for line in diff:
  1281. text += line
  1282. # Work around missing newline (http://bugs.python.org/issue2142).
  1283. if text and not line.endswith(newline):
  1284. text += newline + r'\ No newline at end of file' + newline
  1285. return text
  1286. def _priority_key(pep8_result):
  1287. """Key for sorting PEP8 results.
  1288. Global fixes should be done first. This is important for things like
  1289. indentation.
  1290. """
  1291. priority = [
  1292. # Fix multiline colon-based before semicolon based.
  1293. 'e701',
  1294. # Break multiline statements early.
  1295. 'e702',
  1296. # Things that make lines longer.
  1297. 'e225', 'e231',
  1298. # Remove extraneous whitespace before breaking lines.
  1299. 'e201',
  1300. # Shorten whitespace in comment before resorting to wrapping.
  1301. 'e262'
  1302. ]
  1303. middle_index = 10000
  1304. lowest_priority = [
  1305. # We need to shorten lines last since the logical fixer can get in a
  1306. # loop, which causes us to exit early.
  1307. 'e501',
  1308. 'w503'
  1309. ]
  1310. key = pep8_result['id'].lower()
  1311. try:
  1312. return priority.index(key)
  1313. except ValueError:
  1314. try:
  1315. return middle_index + lowest_priority.index(key) + 1
  1316. except ValueError:
  1317. return middle_index
  1318. def shorten_line(tokens, source, indentation, indent_word, max_line_length,
  1319. aggressive=False, experimental=False, previous_line=''):
  1320. """Separate line at OPERATOR.
  1321. Multiple candidates will be yielded.
  1322. """
  1323. for candidate in _shorten_line(tokens=tokens,
  1324. source=source,
  1325. indentation=indentation,
  1326. indent_word=indent_word,
  1327. aggressive=aggressive,
  1328. previous_line=previous_line):
  1329. yield candidate
  1330. if aggressive:
  1331. for key_token_strings in SHORTEN_OPERATOR_GROUPS:
  1332. shortened = _shorten_line_at_tokens(
  1333. tokens=tokens,
  1334. source=source,
  1335. indentation=indentation,
  1336. indent_word=indent_word,
  1337. key_token_strings=key_token_strings,
  1338. aggressive=aggressive)
  1339. if shortened is not None and shortened != source:
  1340. yield shortened
  1341. if experimental:
  1342. for shortened in _shorten_line_at_tokens_new(
  1343. tokens=tokens,
  1344. source=source,
  1345. indentation=indentation,
  1346. max_line_length=max_line_length):
  1347. yield shortened
  1348. def _shorten_line(tokens, source, indentation, indent_word,
  1349. aggressive=False, previous_line=''):
  1350. """Separate line at OPERATOR.
  1351. The input is expected to be free of newlines except for inside multiline
  1352. strings and at the end.
  1353. Multiple candidates will be yielded.
  1354. """
  1355. for (token_type,
  1356. token_string,
  1357. start_offset,
  1358. end_offset) in token_offsets(tokens):
  1359. if (
  1360. token_type == tokenize.COMMENT and
  1361. not is_probably_part_of_multiline(previous_line) and
  1362. not is_probably_part_of_multiline(source) and
  1363. not source[start_offset + 1:].strip().lower().startswith(
  1364. ('noqa', 'pragma:', 'pylint:'))
  1365. ):
  1366. # Move inline comments to previous line.
  1367. first = source[:start_offset]
  1368. second = source[start_offset:]
  1369. yield (indentation + second.strip() + '\n' +
  1370. indentation + first.strip() + '\n')
  1371. elif token_type == token.OP and token_string != '=':
  1372. # Don't break on '=' after keyword as this violates PEP 8.
  1373. assert token_type != token.INDENT
  1374. first = source[:end_offset]
  1375. second_indent = indentation
  1376. if (first.rstrip().endswith('(') and
  1377. source[end_offset:].lstrip().startswith(')')):
  1378. pass
  1379. elif first.rstrip().endswith('('):
  1380. second_indent += indent_word
  1381. elif '(' in first:
  1382. second_indent += ' ' * (1 + first.find('('))
  1383. else:
  1384. second_indent += indent_word
  1385. second = (second_indent + source[end_offset:].lstrip())
  1386. if (
  1387. not second.strip() or
  1388. second.lstrip().startswith('#')
  1389. ):
  1390. continue
  1391. # Do not begin a line with a comma
  1392. if second.lstrip().startswith(','):
  1393. continue
  1394. # Do end a line with a dot
  1395. if first.rstrip().endswith('.'):
  1396. continue
  1397. if token_string in '+-*/':
  1398. fixed = first + ' \\' + '\n' + second
  1399. else:
  1400. fixed = first + '\n' + second
  1401. # Only fix if syntax is okay.
  1402. if check_syntax(normalize_multiline(fixed)
  1403. if aggressive else fixed):
  1404. yield indentation + fixed
  1405. def _is_binary_operator(token_type, text):
  1406. return ((token_type == tokenize.OP or text in ['and', 'or']) and
  1407. text not in '()[]{},:.;@=%~')
  1408. # A convenient way to handle tokens.
  1409. Token = collections.namedtuple('Token', ['token_type', 'token_string',
  1410. 'spos', 'epos', 'line'])
  1411. class ReformattedLines(object):
  1412. """The reflowed lines of atoms.
  1413. Each part of the line is represented as an "atom." They can be moved
  1414. around when need be to get the optimal formatting.
  1415. """
  1416. ###########################################################################
  1417. # Private Classes
  1418. class _Indent(object):
  1419. """Represent an indentation in the atom stream."""
  1420. def __init__(self, indent_amt):
  1421. self._indent_amt = indent_amt
  1422. def emit(self):
  1423. return ' ' * self._indent_amt
  1424. @property
  1425. def size(self):
  1426. return self._indent_amt
  1427. class _Space(object):
  1428. """Represent a space in the atom stream."""
  1429. def emit(self):
  1430. return ' '
  1431. @property
  1432. def size(self):
  1433. return 1
  1434. class _LineBreak(object):
  1435. """Represent a line break in the atom stream."""
  1436. def emit(self):
  1437. return '\n'
  1438. @property
  1439. def size(self):
  1440. return 0
  1441. def __init__(self, max_line_length):
  1442. self._max_line_length = max_line_length
  1443. self._lines = []
  1444. self._bracket_depth = 0
  1445. self._prev_item = None
  1446. self._prev_prev_item = None
  1447. def __repr__(self):
  1448. return self.emit()
  1449. ###########################################################################
  1450. # Public Methods
  1451. def add(self, obj, indent_amt, break_after_open_bracket):
  1452. if isinstance(obj, Atom):
  1453. self._add_item(obj, indent_amt)
  1454. return
  1455. self._add_container(obj, indent_amt, break_after_open_bracket)
  1456. def add_comment(self, item):
  1457. num_spaces = 2
  1458. if len(self._lines) > 1:
  1459. if isinstance(self._lines[-1], self._Space):
  1460. num_spaces -= 1
  1461. if len(self._lines) > 2:
  1462. if isinstance(self._lines[-2], self._Space):
  1463. num_spaces -= 1
  1464. while num_spaces > 0:
  1465. self._lines.append(self._Space())
  1466. num_spaces -= 1
  1467. self._lines.append(item)
  1468. def add_indent(self, indent_amt):
  1469. self._lines.append(self._Indent(indent_amt))
  1470. def add_line_break(self, indent):
  1471. self._lines.append(self._LineBreak())
  1472. self.add_indent(len(indent))
  1473. def add_line_break_at(self, index, indent_amt):
  1474. self._lines.insert(index, self._LineBreak())
  1475. self._lines.insert(index + 1, self._Indent(indent_amt))
  1476. def add_space_if_needed(self, curr_text, equal=False):
  1477. if (
  1478. not self._lines or isinstance(
  1479. self._lines[-1], (self._LineBreak, self._Indent, self._Space))
  1480. ):
  1481. return
  1482. prev_text = unicode(self._prev_item)
  1483. prev_prev_text = (
  1484. unicode(self._prev_prev_item) if self._prev_prev_item else '')
  1485. if (
  1486. # The previous item was a keyword or identifier and the current
  1487. # item isn't an operator that doesn't require a space.
  1488. ((self._prev_item.is_keyword or self._prev_item.is_string or
  1489. self._prev_item.is_name or self._prev_item.is_number) and
  1490. (curr_text[0] not in '([{.,:}])' or
  1491. (curr_text[0] == '=' and equal))) or
  1492. # Don't place spaces around a '.', unless it's in an 'import'
  1493. # statement.
  1494. ((prev_prev_text != 'from' and prev_text[-1] != '.' and
  1495. curr_text != 'import') and
  1496. # Don't place a space before a colon.
  1497. curr_text[0] != ':' and
  1498. # Don't split up ending brackets by spaces.
  1499. ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
  1500. # Put a space after a colon or comma.
  1501. prev_text[-1] in ':,' or
  1502. # Put space around '=' if asked to.
  1503. (equal and prev_text == '=') or
  1504. # Put spaces around non-unary arithmetic operators.
  1505. ((self._prev_prev_item and
  1506. (prev_text not in '+-' and
  1507. (self._prev_prev_item.is_name or
  1508. self._prev_prev_item.is_number or
  1509. self._prev_prev_item.is_string)) and
  1510. prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
  1511. ):
  1512. self._lines.append(self._Space())
  1513. def previous_item(self):
  1514. """Return the previous non-whitespace item."""
  1515. return self._prev_item
  1516. def fits_on_current_line(self, item_extent):
  1517. return self.current_size() + item_extent <= self._max_line_length
  1518. def current_size(self):
  1519. """The size of the current line minus the indentation."""
  1520. size = 0
  1521. for item in reversed(self._lines):
  1522. size += item.size
  1523. if isinstance(item, self._LineBreak):
  1524. break
  1525. return size
  1526. def line_empty(self):
  1527. return (self._lines and
  1528. isinstance(self._lines[-1],
  1529. (self._LineBreak, self._Indent)))
  1530. def emit(self):
  1531. string = ''
  1532. for item in self._lines:
  1533. if isinstance(item, self._LineBreak):
  1534. string = string.rstrip()
  1535. string += item.emit()
  1536. return string.rstrip() + '\n'
  1537. ###########################################################################
  1538. # Private Methods
  1539. def _add_item(self, item, indent_amt):
  1540. """Add an item to the line.
  1541. Reflow the line to get the best formatting after the item is
  1542. inserted. The bracket depth indicates if the item is being
  1543. inserted inside of a container or not.
  1544. """
  1545. if self._prev_item and self._prev_item.is_string and item.is_string:
  1546. # Place consecutive string literals on separate lines.
  1547. self._lines.append(self._LineBreak())
  1548. self._lines.append(self._Indent(indent_amt))
  1549. item_text = unicode(item)
  1550. if self._lines and self._bracket_depth:
  1551. # Adding the item into a container.
  1552. self._prevent_default_initializer_splitting(item, indent_amt)
  1553. if item_text in '.,)]}':
  1554. self._split_after_delimiter(item, indent_amt)
  1555. elif self._lines and not self.line_empty():
  1556. # Adding the item outside of a container.
  1557. if self.fits_on_current_line(len(item_text)):
  1558. self._enforce_space(item)
  1559. else:
  1560. # Line break for the new item.
  1561. self._lines.append(self._LineBreak())
  1562. self._lines.append(self._Indent(indent_amt))
  1563. self._lines.append(item)
  1564. self._prev_item, self._prev_prev_item = item, self._prev_item
  1565. if item_text in '([{':
  1566. self._bracket_depth += 1
  1567. elif item_text in '}])':
  1568. self._bracket_depth -= 1
  1569. assert self._bracket_depth >= 0
  1570. def _add_container(self, container, indent_amt, break_after_open_bracket):
  1571. actual_indent = indent_amt + 1
  1572. if (
  1573. unicode(self._prev_item) != '=' and
  1574. not self.line_empty() and
  1575. not self.fits_on_current_line(
  1576. container.size + self._bracket_depth + 2)
  1577. ):
  1578. if unicode(container)[0] == '(' and self._prev_item.is_name:
  1579. # Don't split before the opening bracket of a call.
  1580. break_after_open_bracket = True
  1581. actual_indent = indent_amt + 4
  1582. elif (
  1583. break_after_open_bracket or
  1584. unicode(self._prev_item) not in '([{'
  1585. ):
  1586. # If the container doesn't fit on the current line and the
  1587. # current line isn't empty, place the container on the next
  1588. # line.
  1589. self._lines.append(self._LineBreak())
  1590. self._lines.append(self._Indent(indent_amt))
  1591. break_after_open_bracket = False
  1592. else:
  1593. actual_indent = self.current_size() + 1
  1594. break_after_open_bracket = False
  1595. if isinstance(container, (ListComprehension, IfExpression)):
  1596. actual_indent = indent_amt
  1597. # Increase the continued indentation only if recursing on a
  1598. # container.
  1599. container.reflow(self, ' ' * actual_indent,
  1600. break_after_open_bracket=break_after_open_bracket)
  1601. def _prevent_default_initializer_splitting(self, item, indent_amt):
  1602. """Prevent splitting between a default initializer.
  1603. When there is a default initializer, it's best to keep it all on
  1604. the same line. It's nicer and more readable, even if it goes
  1605. over the maximum allowable line length. This goes back along the
  1606. current line to determine if we have a default initializer, and,
  1607. if so, to remove extraneous whitespaces and add a line
  1608. break/indent before it if needed.
  1609. """
  1610. if unicode(item) == '=':
  1611. # This is the assignment in the initializer. Just remove spaces for
  1612. # now.
  1613. self._delete_whitespace()
  1614. return
  1615. if (not self._prev_item or not self._prev_prev_item or
  1616. unicode(self._prev_item) != '='):
  1617. return
  1618. self._delete_whitespace()
  1619. prev_prev_index = self._lines.index(self._prev_prev_item)
  1620. if (
  1621. isinstance(self._lines[prev_prev_index - 1], self._Indent) or
  1622. self.fits_on_current_line(item.size + 1)
  1623. ):
  1624. # The default initializer is already the only item on this line.
  1625. # Don't insert a newline here.
  1626. return
  1627. # Replace the space with a newline/indent combo.
  1628. if isinstance(self._lines[prev_prev_index - 1], self._Space):
  1629. del self._lines[prev_prev_index - 1]
  1630. self.add_line_break_at(self._lines.index(self._prev_prev_item),
  1631. indent_amt)
  1632. def _split_after_delimiter(self, item, indent_amt):
  1633. """Split the line only after a delimiter."""
  1634. self._delete_whitespace()
  1635. if self.fits_on_current_line(item.size):
  1636. return
  1637. last_space = None
  1638. for current_item in reversed(self._lines):
  1639. if (
  1640. last_space and
  1641. (not isinstance(current_item, Atom) or
  1642. not current_item.is_colon)
  1643. ):
  1644. break
  1645. else:
  1646. last_space = None
  1647. if isinstance(current_item, self._Space):
  1648. last_space = current_item
  1649. if isinstance(current_item, (self._LineBreak, self._Indent)):
  1650. return
  1651. if not last_space:
  1652. return
  1653. self.add_line_break_at(self._lines.index(last_space), indent_amt)
  1654. def _enforce_space(self, item):
  1655. """Enforce a space in certain situations.
  1656. There are cases where we will want a space where normally we
  1657. wouldn't put one. This just enforces the addition of a space.
  1658. """
  1659. if isinstance(self._lines[-1],
  1660. (self._Space, self._LineBreak, self._Indent)):
  1661. return
  1662. if not self._prev_item:
  1663. return
  1664. item_text = unicode(item)
  1665. prev_text = unicode(self._prev_item)
  1666. # Prefer a space around a '.' in an import statement, and between the
  1667. # 'import' and '('.
  1668. if (
  1669. (item_text == '.' and prev_text == 'from') or
  1670. (item_text == 'import' and prev_text == '.') or
  1671. (item_text == '(' and prev_text == 'import')
  1672. ):
  1673. self._lines.append(self._Space())
  1674. def _delete_whitespace(self):
  1675. """Delete all whitespace from the end of the line."""
  1676. while isinstance(self._lines[-1], (self._Space, self._LineBreak,
  1677. self._Indent)):
  1678. del self._lines[-1]
  1679. class Atom(object):
  1680. """The smallest unbreakable unit that can be reflowed."""
  1681. def __init__(self, atom):
  1682. self._atom = atom
  1683. def __repr__(self):
  1684. return self._atom.token_string
  1685. def __len__(self):
  1686. return self.size
  1687. def reflow(
  1688. self, reflowed_lines, continued_indent, extent,
  1689. break_after_open_bracket=False,
  1690. is_list_comp_or_if_expr=False,
  1691. next_is_dot=False
  1692. ):
  1693. if self._atom.token_type == tokenize.COMMENT:
  1694. reflowed_lines.add_comment(self)
  1695. return
  1696. total_size = extent if extent else self.size
  1697. if self._atom.token_string not in ',:([{}])':
  1698. # Some atoms will need an extra 1-sized space token after them.
  1699. total_size += 1
  1700. prev_item = reflowed_lines.previous_item()
  1701. if (
  1702. not is_list_comp_or_if_expr and
  1703. not reflowed_lines.fits_on_current_line(total_size) and
  1704. not (next_is_dot and
  1705. reflowed_lines.fits_on_current_line(self.size + 1)) and
  1706. not reflowed_lines.line_empty() and
  1707. not self.is_colon and
  1708. not (prev_item and prev_item.is_name and
  1709. unicode(self) == '(')
  1710. ):
  1711. # Start a new line if there is already something on the line and
  1712. # adding this atom would make it go over the max line length.
  1713. reflowed_lines.add_line_break(continued_indent)
  1714. else:
  1715. reflowed_lines.add_space_if_needed(unicode(self))
  1716. reflowed_lines.add(self, len(continued_indent),
  1717. break_after_open_bracket)
  1718. def emit(self):
  1719. return self.__repr__()
  1720. @property
  1721. def is_keyword(self):
  1722. return keyword.iskeyword(self._atom.token_string)
  1723. @property
  1724. def is_string(self):
  1725. return self._atom.token_type == tokenize.STRING
  1726. @property
  1727. def is_name(self):
  1728. return self._atom.token_type == tokenize.NAME
  1729. @property
  1730. def is_number(self):
  1731. return self._atom.token_type == tokenize.NUMBER
  1732. @property
  1733. def is_comma(self):
  1734. return self._atom.token_string == ','
  1735. @property
  1736. def is_colon(self):
  1737. return self._atom.token_string == ':'
  1738. @property
  1739. def size(self):
  1740. return len(self._atom.token_string)
  1741. class Container(object):
  1742. """Base class for all container types."""
  1743. def __init__(self, items):
  1744. self._items = items
  1745. def __repr__(self):
  1746. string = ''
  1747. last_was_keyword = False
  1748. for item in self._items:
  1749. if item.is_comma:
  1750. string += ', '
  1751. elif item.is_colon:
  1752. string += ': '
  1753. else:
  1754. item_string = unicode(item)
  1755. if (
  1756. string and
  1757. (last_was_keyword or
  1758. (not string.endswith(tuple('([{,.:}]) ')) and
  1759. not item_string.startswith(tuple('([{,.:}])'))))
  1760. ):
  1761. string += ' '
  1762. string += item_string
  1763. last_was_keyword = item.is_keyword
  1764. return string
  1765. def __iter__(self):
  1766. for element in self._items:
  1767. yield element
  1768. def __getitem__(self, idx):
  1769. return self._items[idx]
  1770. def reflow(self, reflowed_lines, continued_indent,
  1771. break_after_open_bracket=False):
  1772. last_was_container = False
  1773. for (index, item) in enumerate(self._items):
  1774. next_item = get_item(self._items, index + 1)
  1775. if isinstance(item, Atom):
  1776. is_list_comp_or_if_expr = (
  1777. isinstance(self, (ListComprehension, IfExpression)))
  1778. item.reflow(reflowed_lines, continued_indent,
  1779. self._get_extent(index),
  1780. is_list_comp_or_if_expr=is_list_comp_or_if_expr,
  1781. next_is_dot=(next_item and
  1782. unicode(next_item) == '.'))
  1783. if last_was_container and item.is_comma:
  1784. reflowed_lines.add_line_break(continued_indent)
  1785. last_was_container = False
  1786. else: # isinstance(item, Container)
  1787. reflowed_lines.add(item, len(continued_indent),
  1788. break_after_open_bracket)
  1789. last_was_container = not isinstance(item, (ListComprehension,
  1790. IfExpression))
  1791. if (
  1792. break_after_open_bracket and index == 0 and
  1793. # Prefer to keep empty containers together instead of
  1794. # separating them.
  1795. unicode(item) == self.open_bracket and
  1796. (not next_item or unicode(next_item) != self.close_bracket) and
  1797. (len(self._items) != 3 or not isinstance(next_item, Atom))
  1798. ):
  1799. reflowed_lines.add_line_break(continued_indent)
  1800. break_after_open_bracket = False
  1801. else:
  1802. next_next_item = get_item(self._items, index + 2)
  1803. if (
  1804. unicode(item) not in ['.', '%', 'in'] and
  1805. next_item and not isinstance(next_item, Container) and
  1806. unicode(next_item) != ':' and
  1807. next_next_item and (not isinstance(next_next_item, Atom) or
  1808. unicode(next_item) == 'not') and
  1809. not reflowed_lines.line_empty() and
  1810. not reflowed_lines.fits_on_current_line(
  1811. self._get_extent(index + 1) + 2)
  1812. ):
  1813. reflowed_lines.add_line_break(continued_indent)
  1814. def _get_extent(self, index):
  1815. """The extent of the full element.
  1816. E.g., the length of a function call or keyword.
  1817. """
  1818. extent = 0
  1819. prev_item = get_item(self._items, index - 1)
  1820. seen_dot = prev_item and unicode(prev_item) == '.'
  1821. while index < len(self._items):
  1822. item = get_item(self._items, index)
  1823. index += 1
  1824. if isinstance(item, (ListComprehension, IfExpression)):
  1825. break
  1826. if isinstance(item, Container):
  1827. if prev_item and prev_item.is_name:
  1828. if seen_dot:
  1829. extent += 1
  1830. else:
  1831. extent += item.size
  1832. prev_item = item
  1833. continue
  1834. elif (unicode(item) not in ['.', '=', ':', 'not'] and
  1835. not item.is_name and not item.is_string):
  1836. break
  1837. if unicode(item) == '.':
  1838. seen_dot = True
  1839. extent += item.size
  1840. prev_item = item
  1841. return extent
  1842. @property
  1843. def is_string(self):
  1844. return False
  1845. @property
  1846. def size(self):
  1847. return len(self.__repr__())
  1848. @property
  1849. def is_keyword(self):
  1850. return False
  1851. @property
  1852. def is_name(self):
  1853. return False
  1854. @property
  1855. def is_comma(self):
  1856. return False
  1857. @property
  1858. def is_colon(self):
  1859. return False
  1860. @property
  1861. def open_bracket(self):
  1862. return None
  1863. @property
  1864. def close_bracket(self):
  1865. return None
  1866. class Tuple(Container):
  1867. """A high-level representation of a tuple."""
  1868. @property
  1869. def open_bracket(self):
  1870. return '('
  1871. @property
  1872. def close_bracket(self):
  1873. return ')'
  1874. class List(Container):
  1875. """A high-level representation of a list."""
  1876. @property
  1877. def open_bracket(self):
  1878. return '['
  1879. @property
  1880. def close_bracket(self):
  1881. return ']'
  1882. class DictOrSet(Container):
  1883. """A high-level representation of a dictionary or set."""
  1884. @property
  1885. def open_bracket(self):
  1886. return '{'
  1887. @property
  1888. def close_bracket(self):
  1889. return '}'
  1890. class ListComprehension(Container):
  1891. """A high-level representation of a list comprehension."""
  1892. @property
  1893. def size(self):
  1894. length = 0
  1895. for item in self._items:
  1896. if isinstance(item, IfExpression):
  1897. break
  1898. length += item.size
  1899. return length
  1900. class IfExpression(Container):
  1901. """A high-level representation of an if-expression."""
  1902. def _parse_container(tokens, index, for_or_if=None):
  1903. """Parse a high-level container, such as a list, tuple, etc."""
  1904. # Store the opening bracket.
  1905. items = [Atom(Token(*tokens[index]))]
  1906. index += 1
  1907. num_tokens = len(tokens)
  1908. while index < num_tokens:
  1909. tok = Token(*tokens[index])
  1910. if tok.token_string in ',)]}':
  1911. # First check if we're at the end of a list comprehension or
  1912. # if-expression. Don't add the ending token as part of the list
  1913. # comprehension or if-expression, because they aren't part of those
  1914. # constructs.
  1915. if for_or_if == 'for':
  1916. return (ListComprehension(items), index - 1)
  1917. elif for_or_if == 'if':
  1918. return (IfExpression(items), index - 1)
  1919. # We've reached the end of a container.
  1920. items.append(Atom(tok))
  1921. # If not, then we are at the end of a container.
  1922. if tok.token_string == ')':
  1923. # The end of a tuple.
  1924. return (Tuple(items), index)
  1925. elif tok.token_string == ']':
  1926. # The end of a list.
  1927. return (List(items), index)
  1928. elif tok.token_string == '}':
  1929. # The end of a dictionary or set.
  1930. return (DictOrSet(items), index)
  1931. elif tok.token_string in '([{':
  1932. # A sub-container is being defined.
  1933. (container, index) = _parse_container(tokens, index)
  1934. items.append(container)
  1935. elif tok.token_string == 'for':
  1936. (container, index) = _parse_container(tokens, index, 'for')
  1937. items.append(container)
  1938. elif tok.token_string == 'if':
  1939. (container, index) = _parse_container(tokens, index, 'if')
  1940. items.append(container)
  1941. else:
  1942. items.append(Atom(tok))
  1943. index += 1
  1944. return (None, None)
  1945. def _parse_tokens(tokens):
  1946. """Parse the tokens.
  1947. This converts the tokens into a form where we can manipulate them
  1948. more easily.
  1949. """
  1950. index = 0
  1951. parsed_tokens = []
  1952. num_tokens = len(tokens)
  1953. while index < num_tokens:
  1954. tok = Token(*tokens[index])
  1955. assert tok.token_type != token.INDENT
  1956. if tok.token_type == tokenize.NEWLINE:
  1957. # There's only one newline and it's at the end.
  1958. break
  1959. if tok.token_string in '([{':
  1960. (container, index) = _parse_container(tokens, index)
  1961. if not container:
  1962. return None
  1963. parsed_tokens.append(container)
  1964. else:
  1965. parsed_tokens.append(Atom(tok))
  1966. index += 1
  1967. return parsed_tokens
  1968. def _reflow_lines(parsed_tokens, indentation, max_line_length,
  1969. start_on_prefix_line):
  1970. """Reflow the lines so that it looks nice."""
  1971. if unicode(parsed_tokens[0]) == 'def':
  1972. # A function definition gets indented a bit more.
  1973. continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
  1974. else:
  1975. continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
  1976. break_after_open_bracket = not start_on_prefix_line
  1977. lines = ReformattedLines(max_line_length)
  1978. lines.add_indent(len(indentation.lstrip('\r\n')))
  1979. if not start_on_prefix_line:
  1980. # If splitting after the opening bracket will cause the first element
  1981. # to be aligned weirdly, don't try it.
  1982. first_token = get_item(parsed_tokens, 0)
  1983. second_token = get_item(parsed_tokens, 1)
  1984. if (
  1985. first_token and second_token and
  1986. unicode(second_token)[0] == '(' and
  1987. len(indentation) + len(first_token) + 1 == len(continued_indent)
  1988. ):
  1989. return None
  1990. for item in parsed_tokens:
  1991. lines.add_space_if_needed(unicode(item), equal=True)
  1992. save_continued_indent = continued_indent
  1993. if start_on_prefix_line and isinstance(item, Container):
  1994. start_on_prefix_line = False
  1995. continued_indent = ' ' * (lines.current_size() + 1)
  1996. item.reflow(lines, continued_indent, break_after_open_bracket)
  1997. continued_indent = save_continued_indent
  1998. return lines.emit()
  1999. def _shorten_line_at_tokens_new(tokens, source, indentation,
  2000. max_line_length):
  2001. """Shorten the line taking its length into account.
  2002. The input is expected to be free of newlines except for inside
  2003. multiline strings and at the end.
  2004. """
  2005. # Yield the original source so to see if it's a better choice than the
  2006. # shortened candidate lines we generate here.
  2007. yield indentation + source
  2008. parsed_tokens = _parse_tokens(tokens)
  2009. if parsed_tokens:
  2010. # Perform two reflows. The first one starts on the same line as the
  2011. # prefix. The second starts on the line after the prefix.
  2012. fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
  2013. start_on_prefix_line=True)
  2014. if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
  2015. yield fixed
  2016. fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
  2017. start_on_prefix_line=False)
  2018. if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
  2019. yield fixed
  2020. def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
  2021. key_token_strings, aggressive):
  2022. """Separate line by breaking at tokens in key_token_strings.
  2023. The input is expected to be free of newlines except for inside
  2024. multiline strings and at the end.
  2025. """
  2026. offsets = []
  2027. for (index, _t) in enumerate(token_offsets(tokens)):
  2028. (token_type,
  2029. token_string,
  2030. start_offset,
  2031. end_offset) = _t
  2032. assert token_type != token.INDENT
  2033. if token_string in key_token_strings:
  2034. # Do not break in containers with zero or one items.
  2035. unwanted_next_token = {
  2036. '(': ')',
  2037. '[': ']',
  2038. '{': '}'}.get(token_string)
  2039. if unwanted_next_token:
  2040. if (
  2041. get_item(tokens,
  2042. index + 1,
  2043. default=[None, None])[1] == unwanted_next_token or
  2044. get_item(tokens,
  2045. index + 2,
  2046. default=[None, None])[1] == unwanted_next_token
  2047. ):
  2048. continue
  2049. if (
  2050. index > 2 and token_string == '(' and
  2051. tokens[index - 1][1] in ',(%['
  2052. ):
  2053. # Don't split after a tuple start, or before a tuple start if
  2054. # the tuple is in a list.
  2055. continue
  2056. if end_offset < len(source) - 1:
  2057. # Don't split right before newline.
  2058. offsets.append(end_offset)
  2059. else:
  2060. # Break at adjacent strings. These were probably meant to be on
  2061. # separate lines in the first place.
  2062. previous_token = get_item(tokens, index - 1)
  2063. if (
  2064. token_type == tokenize.STRING and
  2065. previous_token and previous_token[0] == tokenize.STRING
  2066. ):
  2067. offsets.append(start_offset)
  2068. current_indent = None
  2069. fixed = None
  2070. for line in split_at_offsets(source, offsets):
  2071. if fixed:
  2072. fixed += '\n' + current_indent + line
  2073. for symbol in '([{':
  2074. if line.endswith(symbol):
  2075. current_indent += indent_word
  2076. else:
  2077. # First line.
  2078. fixed = line
  2079. assert not current_indent
  2080. current_indent = indent_word
  2081. assert fixed is not None
  2082. if check_syntax(normalize_multiline(fixed)
  2083. if aggressive > 1 else fixed):
  2084. return indentation + fixed
  2085. return None
  2086. def token_offsets(tokens):
  2087. """Yield tokens and offsets."""
  2088. end_offset = 0
  2089. previous_end_row = 0
  2090. previous_end_column = 0
  2091. for t in tokens:
  2092. token_type = t[0]
  2093. token_string = t[1]
  2094. (start_row, start_column) = t[2]
  2095. (end_row, end_column) = t[3]
  2096. # Account for the whitespace between tokens.
  2097. end_offset += start_column
  2098. if previous_end_row == start_row:
  2099. end_offset -= previous_end_column
  2100. # Record the start offset of the token.
  2101. start_offset = end_offset
  2102. # Account for the length of the token itself.
  2103. end_offset += len(token_string)
  2104. yield (token_type,
  2105. token_string,
  2106. start_offset,
  2107. end_offset)
  2108. previous_end_row = end_row
  2109. previous_end_column = end_column
  2110. def normalize_multiline(line):
  2111. """Normalize multiline-related code that will cause syntax error.
  2112. This is for purposes of checking syntax.
  2113. """
  2114. if line.startswith('def ') and line.rstrip().endswith(':'):
  2115. return line + ' pass'
  2116. elif line.startswith('return '):
  2117. return 'def _(): ' + line
  2118. elif line.startswith('@'):
  2119. return line + 'def _(): pass'
  2120. elif line.startswith('class '):
  2121. return line + ' pass'
  2122. elif line.startswith(('if ', 'elif ', 'for ', 'while ')):
  2123. return line + ' pass'
  2124. return line
  2125. def fix_whitespace(line, offset, replacement):
  2126. """Replace whitespace at offset and return fixed line."""
  2127. # Replace escaped newlines too
  2128. left = line[:offset].rstrip('\n\r \t\\')
  2129. right = line[offset:].lstrip('\n\r \t\\')
  2130. if right.startswith('#'):
  2131. return line
  2132. return left + replacement + right
  2133. def _execute_pep8(pep8_options, source):
  2134. """Execute pycodestyle via python method calls."""
  2135. class QuietReport(pycodestyle.BaseReport):
  2136. """Version of checker that does not print."""
  2137. def __init__(self, options):
  2138. super(QuietReport, self).__init__(options)
  2139. self.__full_error_results = []
  2140. def error(self, line_number, offset, text, check):
  2141. """Collect errors."""
  2142. code = super(QuietReport, self).error(line_number,
  2143. offset,
  2144. text,
  2145. check)
  2146. if code:
  2147. self.__full_error_results.append(
  2148. {'id': code,
  2149. 'line': line_number,
  2150. 'column': offset + 1,
  2151. 'info': text})
  2152. def full_error_results(self):
  2153. """Return error results in detail.
  2154. Results are in the form of a list of dictionaries. Each
  2155. dictionary contains 'id', 'line', 'column', and 'info'.
  2156. """
  2157. return self.__full_error_results
  2158. checker = pycodestyle.Checker('', lines=source, reporter=QuietReport,
  2159. **pep8_options)
  2160. checker.check_all()
  2161. return checker.report.full_error_results()
  2162. def _remove_leading_and_normalize(line):
  2163. # ignore FF in first lstrip()
  2164. return line.lstrip(' \t\v').rstrip(CR + LF) + '\n'
  2165. class Reindenter(object):
  2166. """Reindents badly-indented code to uniformly use four-space indentation.
  2167. Released to the public domain, by Tim Peters, 03 October 2000.
  2168. """
  2169. def __init__(self, input_text):
  2170. sio = io.StringIO(input_text)
  2171. source_lines = sio.readlines()
  2172. self.string_content_line_numbers = multiline_string_lines(input_text)
  2173. # File lines, rstripped & tab-expanded. Dummy at start is so
  2174. # that we can use tokenize's 1-based line numbering easily.
  2175. # Note that a line is all-blank iff it is a newline.
  2176. self.lines = []
  2177. for line_number, line in enumerate(source_lines, start=1):
  2178. # Do not modify if inside a multiline string.
  2179. if line_number in self.string_content_line_numbers:
  2180. self.lines.append(line)
  2181. else:
  2182. # Only expand leading tabs.
  2183. self.lines.append(_get_indentation(line).expandtabs() +
  2184. _remove_leading_and_normalize(line))
  2185. self.lines.insert(0, None)
  2186. self.index = 1 # index into self.lines of next line
  2187. self.input_text = input_text
  2188. def run(self, indent_size=DEFAULT_INDENT_SIZE):
  2189. """Fix indentation and return modified line numbers.
  2190. Line numbers are indexed at 1.
  2191. """
  2192. if indent_size < 1:
  2193. return self.input_text
  2194. try:
  2195. stats = _reindent_stats(tokenize.generate_tokens(self.getline))
  2196. except (SyntaxError, tokenize.TokenError):
  2197. return self.input_text
  2198. # Remove trailing empty lines.
  2199. lines = self.lines
  2200. # Sentinel.
  2201. stats.append((len(lines), 0))
  2202. # Map count of leading spaces to # we want.
  2203. have2want = {}
  2204. # Program after transformation.
  2205. after = []
  2206. # Copy over initial empty lines -- there's nothing to do until
  2207. # we see a line with *something* on it.
  2208. i = stats[0][0]
  2209. after.extend(lines[1:i])
  2210. for i in range(len(stats) - 1):
  2211. thisstmt, thislevel = stats[i]
  2212. nextstmt = stats[i + 1][0]
  2213. have = _leading_space_count(lines[thisstmt])
  2214. want = thislevel * indent_size
  2215. if want < 0:
  2216. # A comment line.
  2217. if have:
  2218. # An indented comment line. If we saw the same
  2219. # indentation before, reuse what it most recently
  2220. # mapped to.
  2221. want = have2want.get(have, -1)
  2222. if want < 0:
  2223. # Then it probably belongs to the next real stmt.
  2224. for j in range(i + 1, len(stats) - 1):
  2225. jline, jlevel = stats[j]
  2226. if jlevel >= 0:
  2227. if have == _leading_space_count(lines[jline]):
  2228. want = jlevel * indent_size
  2229. break
  2230. if want < 0: # Maybe it's a hanging
  2231. # comment like this one,
  2232. # in which case we should shift it like its base
  2233. # line got shifted.
  2234. for j in range(i - 1, -1, -1):
  2235. jline, jlevel = stats[j]
  2236. if jlevel >= 0:
  2237. want = (have + _leading_space_count(
  2238. after[jline - 1]) -
  2239. _leading_space_count(lines[jline]))
  2240. break
  2241. if want < 0:
  2242. # Still no luck -- leave it alone.
  2243. want = have
  2244. else:
  2245. want = 0
  2246. assert want >= 0
  2247. have2want[have] = want
  2248. diff = want - have
  2249. if diff == 0 or have == 0:
  2250. after.extend(lines[thisstmt:nextstmt])
  2251. else:
  2252. for line_number, line in enumerate(lines[thisstmt:nextstmt],
  2253. start=thisstmt):
  2254. if line_number in self.string_content_line_numbers:
  2255. after.append(line)
  2256. elif diff > 0:
  2257. if line == '\n':
  2258. after.append(line)
  2259. else:
  2260. after.append(' ' * diff + line)
  2261. else:
  2262. remove = min(_leading_space_count(line), -diff)
  2263. after.append(line[remove:])
  2264. return ''.join(after)
  2265. def getline(self):
  2266. """Line-getter for tokenize."""
  2267. if self.index >= len(self.lines):
  2268. line = ''
  2269. else:
  2270. line = self.lines[self.index]
  2271. self.index += 1
  2272. return line
  2273. def _reindent_stats(tokens):
  2274. """Return list of (lineno, indentlevel) pairs.
  2275. One for each stmt and comment line. indentlevel is -1 for comment
  2276. lines, as a signal that tokenize doesn't know what to do about them;
  2277. indeed, they're our headache!
  2278. """
  2279. find_stmt = 1 # Next token begins a fresh stmt?
  2280. level = 0 # Current indent level.
  2281. stats = []
  2282. for t in tokens:
  2283. token_type = t[0]
  2284. sline = t[2][0]
  2285. line = t[4]
  2286. if token_type == tokenize.NEWLINE:
  2287. # A program statement, or ENDMARKER, will eventually follow,
  2288. # after some (possibly empty) run of tokens of the form
  2289. # (NL | COMMENT)* (INDENT | DEDENT+)?
  2290. find_stmt = 1
  2291. elif token_type == tokenize.INDENT:
  2292. find_stmt = 1
  2293. level += 1
  2294. elif token_type == tokenize.DEDENT:
  2295. find_stmt = 1
  2296. level -= 1
  2297. elif token_type == tokenize.COMMENT:
  2298. if find_stmt:
  2299. stats.append((sline, -1))
  2300. # But we're still looking for a new stmt, so leave
  2301. # find_stmt alone.
  2302. elif token_type == tokenize.NL:
  2303. pass
  2304. elif find_stmt:
  2305. # This is the first "real token" following a NEWLINE, so it
  2306. # must be the first token of the next program statement, or an
  2307. # ENDMARKER.
  2308. find_stmt = 0
  2309. if line: # Not endmarker.
  2310. stats.append((sline, level))
  2311. return stats
  2312. def _leading_space_count(line):
  2313. """Return number of leading spaces in line."""
  2314. i = 0
  2315. while i < len(line) and line[i] == ' ':
  2316. i += 1
  2317. return i
  2318. def refactor_with_2to3(source_text, fixer_names, filename=''):
  2319. """Use lib2to3 to refactor the source.
  2320. Return the refactored source code.
  2321. """
  2322. from lib2to3.refactor import RefactoringTool
  2323. fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
  2324. tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
  2325. from lib2to3.pgen2 import tokenize as lib2to3_tokenize
  2326. try:
  2327. # The name parameter is necessary particularly for the "import" fixer.
  2328. return unicode(tool.refactor_string(source_text, name=filename))
  2329. except lib2to3_tokenize.TokenError:
  2330. return source_text
  2331. def check_syntax(code):
  2332. """Return True if syntax is okay."""
  2333. try:
  2334. return compile(code, '<string>', 'exec', dont_inherit=True)
  2335. except (SyntaxError, TypeError, UnicodeDecodeError):
  2336. return False
  2337. def filter_results(source, results, aggressive):
  2338. """Filter out spurious reports from pycodestyle.
  2339. If aggressive is True, we allow possibly unsafe fixes (E711, E712).
  2340. """
  2341. non_docstring_string_line_numbers = multiline_string_lines(
  2342. source, include_docstrings=False)
  2343. all_string_line_numbers = multiline_string_lines(
  2344. source, include_docstrings=True)
  2345. commented_out_code_line_numbers = commented_out_code_lines(source)
  2346. has_e901 = any(result['id'].lower() == 'e901' for result in results)
  2347. for r in results:
  2348. issue_id = r['id'].lower()
  2349. if r['line'] in non_docstring_string_line_numbers:
  2350. if issue_id.startswith(('e1', 'e501', 'w191')):
  2351. continue
  2352. if r['line'] in all_string_line_numbers:
  2353. if issue_id in ['e501']:
  2354. continue
  2355. # We must offset by 1 for lines that contain the trailing contents of
  2356. # multiline strings.
  2357. if not aggressive and (r['line'] + 1) in all_string_line_numbers:
  2358. # Do not modify multiline strings in non-aggressive mode. Remove
  2359. # trailing whitespace could break doctests.
  2360. if issue_id.startswith(('w29', 'w39')):
  2361. continue
  2362. if aggressive <= 0:
  2363. if issue_id.startswith(('e711', 'e72', 'w6')):
  2364. continue
  2365. if aggressive <= 1:
  2366. if issue_id.startswith(('e712', 'e713', 'e714', 'w5')):
  2367. continue
  2368. if aggressive <= 2:
  2369. if issue_id.startswith(('e704', 'w5')):
  2370. continue
  2371. if r['line'] in commented_out_code_line_numbers:
  2372. if issue_id.startswith(('e26', 'e501')):
  2373. continue
  2374. # Do not touch indentation if there is a token error caused by
  2375. # incomplete multi-line statement. Otherwise, we risk screwing up the
  2376. # indentation.
  2377. if has_e901:
  2378. if issue_id.startswith(('e1', 'e7')):
  2379. continue
  2380. yield r
  2381. def multiline_string_lines(source, include_docstrings=False):
  2382. """Return line numbers that are within multiline strings.
  2383. The line numbers are indexed at 1.
  2384. Docstrings are ignored.
  2385. """
  2386. line_numbers = set()
  2387. previous_token_type = ''
  2388. try:
  2389. for t in generate_tokens(source):
  2390. token_type = t[0]
  2391. start_row = t[2][0]
  2392. end_row = t[3][0]
  2393. if token_type == tokenize.STRING and start_row != end_row:
  2394. if (
  2395. include_docstrings or
  2396. previous_token_type != tokenize.INDENT
  2397. ):
  2398. # We increment by one since we want the contents of the
  2399. # string.
  2400. line_numbers |= set(range(1 + start_row, 1 + end_row))
  2401. previous_token_type = token_type
  2402. except (SyntaxError, tokenize.TokenError):
  2403. pass
  2404. return line_numbers
  2405. def commented_out_code_lines(source):
  2406. """Return line numbers of comments that are likely code.
  2407. Commented-out code is bad practice, but modifying it just adds even
  2408. more clutter.
  2409. """
  2410. line_numbers = []
  2411. try:
  2412. for t in generate_tokens(source):
  2413. token_type = t[0]
  2414. token_string = t[1]
  2415. start_row = t[2][0]
  2416. line = t[4]
  2417. # Ignore inline comments.
  2418. if not line.lstrip().startswith('#'):
  2419. continue
  2420. if token_type == tokenize.COMMENT:
  2421. stripped_line = token_string.lstrip('#').strip()
  2422. if (
  2423. ' ' in stripped_line and
  2424. '#' not in stripped_line and
  2425. check_syntax(stripped_line)
  2426. ):
  2427. line_numbers.append(start_row)
  2428. except (SyntaxError, tokenize.TokenError):
  2429. pass
  2430. return line_numbers
  2431. def shorten_comment(line, max_line_length, last_comment=False):
  2432. """Return trimmed or split long comment line.
  2433. If there are no comments immediately following it, do a text wrap.
  2434. Doing this wrapping on all comments in general would lead to jagged
  2435. comment text.
  2436. """
  2437. assert len(line) > max_line_length
  2438. line = line.rstrip()
  2439. # PEP 8 recommends 72 characters for comment text.
  2440. indentation = _get_indentation(line) + '# '
  2441. max_line_length = min(max_line_length,
  2442. len(indentation) + 72)
  2443. MIN_CHARACTER_REPEAT = 5
  2444. if (
  2445. len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
  2446. not line[-1].isalnum()
  2447. ):
  2448. # Trim comments that end with things like ---------
  2449. return line[:max_line_length] + '\n'
  2450. elif last_comment and re.match(r'\s*#+\s*\w+', line):
  2451. split_lines = textwrap.wrap(line.lstrip(' \t#'),
  2452. initial_indent=indentation,
  2453. subsequent_indent=indentation,
  2454. width=max_line_length,
  2455. break_long_words=False,
  2456. break_on_hyphens=False)
  2457. return '\n'.join(split_lines) + '\n'
  2458. return line + '\n'
  2459. def normalize_line_endings(lines, newline):
  2460. """Return fixed line endings.
  2461. All lines will be modified to use the most common line ending.
  2462. """
  2463. return [line.rstrip('\n\r') + newline for line in lines]
  2464. def mutual_startswith(a, b):
  2465. return b.startswith(a) or a.startswith(b)
  2466. def code_match(code, select, ignore):
  2467. if ignore:
  2468. assert not isinstance(ignore, unicode)
  2469. for ignored_code in [c.strip() for c in ignore]:
  2470. if mutual_startswith(code.lower(), ignored_code.lower()):
  2471. return False
  2472. if select:
  2473. assert not isinstance(select, unicode)
  2474. for selected_code in [c.strip() for c in select]:
  2475. if mutual_startswith(code.lower(), selected_code.lower()):
  2476. return True
  2477. return False
  2478. return True
  2479. def fix_code(source, options=None, encoding=None, apply_config=False):
  2480. """Return fixed source code.
  2481. "encoding" will be used to decode "source" if it is a byte string.
  2482. """
  2483. options = _get_options(options, apply_config)
  2484. if not isinstance(source, unicode):
  2485. source = source.decode(encoding or get_encoding())
  2486. sio = io.StringIO(source)
  2487. return fix_lines(sio.readlines(), options=options)
  2488. def _get_options(raw_options, apply_config):
  2489. """Return parsed options."""
  2490. if not raw_options:
  2491. return parse_args([''], apply_config=apply_config)
  2492. if isinstance(raw_options, dict):
  2493. options = parse_args([''], apply_config=apply_config)
  2494. for name, value in raw_options.items():
  2495. if not hasattr(options, name):
  2496. raise ValueError("No such option '{}'".format(name))
  2497. # Check for very basic type errors.
  2498. expected_type = type(getattr(options, name))
  2499. if not isinstance(expected_type, (str, unicode)):
  2500. if isinstance(value, (str, unicode)):
  2501. raise ValueError(
  2502. "Option '{}' should not be a string".format(name))
  2503. setattr(options, name, value)
  2504. else:
  2505. options = raw_options
  2506. return options
  2507. def fix_lines(source_lines, options, filename=''):
  2508. """Return fixed source code."""
  2509. # Transform everything to line feed. Then change them back to original
  2510. # before returning fixed source code.
  2511. original_newline = find_newline(source_lines)
  2512. tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
  2513. # Keep a history to break out of cycles.
  2514. previous_hashes = set()
  2515. if options.line_range:
  2516. # Disable "apply_local_fixes()" for now due to issue #175.
  2517. fixed_source = tmp_source
  2518. else:
  2519. pep8_options = {
  2520. 'ignore': options.ignore,
  2521. 'select': options.select,
  2522. 'max_line_length': options.max_line_length,
  2523. 'hang_closing': options.hang_closing,
  2524. }
  2525. sio = io.StringIO(tmp_source)
  2526. contents = sio.readlines()
  2527. results = _execute_pep8(pep8_options, contents)
  2528. codes = {result['id'] for result in results
  2529. if result['id'] in SELECTED_GLOBAL_FIXED_METHOD_CODES}
  2530. # Apply global fixes only once (for efficiency).
  2531. fixed_source = apply_global_fixes(tmp_source,
  2532. options,
  2533. filename=filename,
  2534. codes=codes)
  2535. passes = 0
  2536. long_line_ignore_cache = set()
  2537. while hash(fixed_source) not in previous_hashes:
  2538. if options.pep8_passes >= 0 and passes > options.pep8_passes:
  2539. break
  2540. passes += 1
  2541. previous_hashes.add(hash(fixed_source))
  2542. tmp_source = copy.copy(fixed_source)
  2543. fix = FixPEP8(
  2544. filename,
  2545. options,
  2546. contents=tmp_source,
  2547. long_line_ignore_cache=long_line_ignore_cache)
  2548. fixed_source = fix.fix()
  2549. sio = io.StringIO(fixed_source)
  2550. return ''.join(normalize_line_endings(sio.readlines(), original_newline))
  2551. def fix_file(filename, options=None, output=None, apply_config=False):
  2552. if not options:
  2553. options = parse_args([filename], apply_config=apply_config)
  2554. original_source = readlines_from_file(filename)
  2555. fixed_source = original_source
  2556. if options.in_place or output:
  2557. encoding = detect_encoding(filename)
  2558. if output:
  2559. output = LineEndingWrapper(wrap_output(output, encoding=encoding))
  2560. fixed_source = fix_lines(fixed_source, options, filename=filename)
  2561. if options.diff:
  2562. new = io.StringIO(fixed_source)
  2563. new = new.readlines()
  2564. diff = get_diff_text(original_source, new, filename)
  2565. if output:
  2566. output.write(diff)
  2567. output.flush()
  2568. else:
  2569. return diff
  2570. elif options.in_place:
  2571. fp = open_with_encoding(filename, encoding=encoding, mode='w')
  2572. fp.write(fixed_source)
  2573. fp.close()
  2574. else:
  2575. if output:
  2576. output.write(fixed_source)
  2577. output.flush()
  2578. else:
  2579. return fixed_source
  2580. def global_fixes():
  2581. """Yield multiple (code, function) tuples."""
  2582. for function in list(globals().values()):
  2583. if inspect.isfunction(function):
  2584. arguments = _get_parameters(function)
  2585. if arguments[:1] != ['source']:
  2586. continue
  2587. code = extract_code_from_function(function)
  2588. if code:
  2589. yield (code, function)
  2590. def _get_parameters(function):
  2591. # pylint: disable=deprecated-method
  2592. if sys.version_info.major >= 3:
  2593. # We need to match "getargspec()", which includes "self" as the first
  2594. # value for methods.
  2595. # https://bugs.python.org/issue17481#msg209469
  2596. if inspect.ismethod(function):
  2597. function = function.__func__
  2598. return list(inspect.signature(function).parameters)
  2599. else:
  2600. return inspect.getargspec(function)[0]
  2601. def apply_global_fixes(source, options, where='global', filename='',
  2602. codes=None):
  2603. """Run global fixes on source code.
  2604. These are fixes that only need be done once (unlike those in
  2605. FixPEP8, which are dependent on pycodestyle).
  2606. """
  2607. if codes is None:
  2608. codes = []
  2609. if any(code_match(code, select=options.select, ignore=options.ignore)
  2610. for code in ['E101', 'E111']):
  2611. source = reindent(source,
  2612. indent_size=options.indent_size)
  2613. for (code, function) in global_fixes():
  2614. if code.upper() in SELECTED_GLOBAL_FIXED_METHOD_CODES \
  2615. and code.upper() not in codes:
  2616. continue
  2617. if code_match(code, select=options.select, ignore=options.ignore):
  2618. if options.verbose:
  2619. print('---> Applying {} fix for {}'.format(where,
  2620. code.upper()),
  2621. file=sys.stderr)
  2622. source = function(source,
  2623. aggressive=options.aggressive)
  2624. source = fix_2to3(source,
  2625. aggressive=options.aggressive,
  2626. select=options.select,
  2627. ignore=options.ignore,
  2628. filename=filename,
  2629. where=where,
  2630. verbose=options.verbose)
  2631. return source
  2632. def extract_code_from_function(function):
  2633. """Return code handled by function."""
  2634. if not function.__name__.startswith('fix_'):
  2635. return None
  2636. code = re.sub('^fix_', '', function.__name__)
  2637. if not code:
  2638. return None
  2639. try:
  2640. int(code[1:])
  2641. except ValueError:
  2642. return None
  2643. return code
  2644. def _get_package_version():
  2645. packages = ["pycodestyle: {}".format(pycodestyle.__version__)]
  2646. return ", ".join(packages)
  2647. def create_parser():
  2648. """Return command-line parser."""
  2649. parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
  2650. prog='autopep8')
  2651. parser.add_argument('--version', action='version',
  2652. version='%(prog)s {} ({})'.format(
  2653. __version__, _get_package_version()))
  2654. parser.add_argument('-v', '--verbose', action='count',
  2655. default=0,
  2656. help='print verbose messages; '
  2657. 'multiple -v result in more verbose messages')
  2658. parser.add_argument('-d', '--diff', action='store_true',
  2659. help='print the diff for the fixed source')
  2660. parser.add_argument('-i', '--in-place', action='store_true',
  2661. help='make changes to files in place')
  2662. parser.add_argument('--global-config', metavar='filename',
  2663. default=DEFAULT_CONFIG,
  2664. help='path to a global pep8 config file; if this file '
  2665. 'does not exist then this is ignored '
  2666. '(default: {})'.format(DEFAULT_CONFIG))
  2667. parser.add_argument('--ignore-local-config', action='store_true',
  2668. help="don't look for and apply local config files; "
  2669. 'if not passed, defaults are updated with any '
  2670. "config files in the project's root directory")
  2671. parser.add_argument('-r', '--recursive', action='store_true',
  2672. help='run recursively over directories; '
  2673. 'must be used with --in-place or --diff')
  2674. parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
  2675. help='number of parallel jobs; '
  2676. 'match CPU count if value is less than 1')
  2677. parser.add_argument('-p', '--pep8-passes', metavar='n',
  2678. default=-1, type=int,
  2679. help='maximum number of additional pep8 passes '
  2680. '(default: infinite)')
  2681. parser.add_argument('-a', '--aggressive', action='count', default=0,
  2682. help='enable non-whitespace changes; '
  2683. 'multiple -a result in more aggressive changes')
  2684. parser.add_argument('--experimental', action='store_true',
  2685. help='enable experimental fixes')
  2686. parser.add_argument('--exclude', metavar='globs',
  2687. help='exclude file/directory names that match these '
  2688. 'comma-separated globs')
  2689. parser.add_argument('--list-fixes', action='store_true',
  2690. help='list codes for fixes; '
  2691. 'used by --ignore and --select')
  2692. parser.add_argument('--ignore', metavar='errors', default='',
  2693. help='do not fix these errors/warnings '
  2694. '(default: {})'.format(DEFAULT_IGNORE))
  2695. parser.add_argument('--select', metavar='errors', default='',
  2696. help='fix only these errors/warnings (e.g. E4,W)')
  2697. parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
  2698. help='set maximum allowed line length '
  2699. '(default: %(default)s)')
  2700. parser.add_argument('--line-range', '--range', metavar='line',
  2701. default=None, type=int, nargs=2,
  2702. help='only fix errors found within this inclusive '
  2703. 'range of line numbers (e.g. 1 99); '
  2704. 'line numbers are indexed at 1')
  2705. parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
  2706. type=int, help=argparse.SUPPRESS)
  2707. parser.add_argument('--hang-closing', action='store_true',
  2708. help='hang-closing option passed to pycodestyle')
  2709. parser.add_argument('files', nargs='*',
  2710. help="files to format or '-' for standard in")
  2711. return parser
  2712. def parse_args(arguments, apply_config=False):
  2713. """Parse command-line options."""
  2714. parser = create_parser()
  2715. args = parser.parse_args(arguments)
  2716. if not args.files and not args.list_fixes:
  2717. parser.error('incorrect number of arguments')
  2718. args.files = [decode_filename(name) for name in args.files]
  2719. if apply_config:
  2720. parser = read_config(args, parser)
  2721. args = parser.parse_args(arguments)
  2722. args.files = [decode_filename(name) for name in args.files]
  2723. if '-' in args.files:
  2724. if len(args.files) > 1:
  2725. parser.error('cannot mix stdin and regular files')
  2726. if args.diff:
  2727. parser.error('--diff cannot be used with standard input')
  2728. if args.in_place:
  2729. parser.error('--in-place cannot be used with standard input')
  2730. if args.recursive:
  2731. parser.error('--recursive cannot be used with standard input')
  2732. if len(args.files) > 1 and not (args.in_place or args.diff):
  2733. parser.error('autopep8 only takes one filename as argument '
  2734. 'unless the "--in-place" or "--diff" args are '
  2735. 'used')
  2736. if args.recursive and not (args.in_place or args.diff):
  2737. parser.error('--recursive must be used with --in-place or --diff')
  2738. if args.in_place and args.diff:
  2739. parser.error('--in-place and --diff are mutually exclusive')
  2740. if args.max_line_length <= 0:
  2741. parser.error('--max-line-length must be greater than 0')
  2742. if args.select:
  2743. args.select = _split_comma_separated(args.select)
  2744. if args.ignore:
  2745. args.ignore = _split_comma_separated(args.ignore)
  2746. elif not args.select:
  2747. if args.aggressive:
  2748. # Enable everything by default if aggressive.
  2749. args.select = {'E', 'W'}
  2750. else:
  2751. args.ignore = _split_comma_separated(DEFAULT_IGNORE)
  2752. if args.exclude:
  2753. args.exclude = _split_comma_separated(args.exclude)
  2754. else:
  2755. args.exclude = {}
  2756. if args.jobs < 1:
  2757. # Do not import multiprocessing globally in case it is not supported
  2758. # on the platform.
  2759. import multiprocessing
  2760. args.jobs = multiprocessing.cpu_count()
  2761. if args.jobs > 1 and not args.in_place:
  2762. parser.error('parallel jobs requires --in-place')
  2763. if args.line_range:
  2764. if args.line_range[0] <= 0:
  2765. parser.error('--range must be positive numbers')
  2766. if args.line_range[0] > args.line_range[1]:
  2767. parser.error('First value of --range should be less than or equal '
  2768. 'to the second')
  2769. return args
  2770. def read_config(args, parser):
  2771. """Read both user configuration and local configuration."""
  2772. try:
  2773. from configparser import ConfigParser as SafeConfigParser
  2774. from configparser import Error
  2775. except ImportError:
  2776. from ConfigParser import SafeConfigParser
  2777. from ConfigParser import Error
  2778. config = SafeConfigParser()
  2779. try:
  2780. config.read(args.global_config)
  2781. if not args.ignore_local_config:
  2782. parent = tail = args.files and os.path.abspath(
  2783. os.path.commonprefix(args.files))
  2784. while tail:
  2785. if config.read([os.path.join(parent, fn)
  2786. for fn in PROJECT_CONFIG]):
  2787. break
  2788. (parent, tail) = os.path.split(parent)
  2789. defaults = {}
  2790. option_list = {o.dest: o.type or type(o.default)
  2791. for o in parser._actions}
  2792. for section in ['pep8', 'pycodestyle', 'flake8']:
  2793. if not config.has_section(section):
  2794. continue
  2795. for (k, _) in config.items(section):
  2796. norm_opt = k.lstrip('-').replace('-', '_')
  2797. if not option_list.get(norm_opt):
  2798. continue
  2799. opt_type = option_list[norm_opt]
  2800. if opt_type is int:
  2801. value = config.getint(section, k)
  2802. elif opt_type is bool:
  2803. value = config.getboolean(section, k)
  2804. else:
  2805. value = config.get(section, k)
  2806. if args.verbose:
  2807. print("enable config: section={}, key={}, value={}".format(
  2808. section, k, value))
  2809. defaults[norm_opt] = value
  2810. parser.set_defaults(**defaults)
  2811. except Error:
  2812. # Ignore for now.
  2813. pass
  2814. return parser
  2815. def _split_comma_separated(string):
  2816. """Return a set of strings."""
  2817. return {text.strip() for text in string.split(',') if text.strip()}
  2818. def decode_filename(filename):
  2819. """Return Unicode filename."""
  2820. if isinstance(filename, unicode):
  2821. return filename
  2822. return filename.decode(sys.getfilesystemencoding())
  2823. def supported_fixes():
  2824. """Yield pep8 error codes that autopep8 fixes.
  2825. Each item we yield is a tuple of the code followed by its
  2826. description.
  2827. """
  2828. yield ('E101', docstring_summary(reindent.__doc__))
  2829. instance = FixPEP8(filename=None, options=None, contents='')
  2830. for attribute in dir(instance):
  2831. code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
  2832. if code:
  2833. yield (
  2834. code.group(1).upper(),
  2835. re.sub(r'\s+', ' ',
  2836. docstring_summary(getattr(instance, attribute).__doc__))
  2837. )
  2838. for (code, function) in sorted(global_fixes()):
  2839. yield (code.upper() + (4 - len(code)) * ' ',
  2840. re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
  2841. for code in sorted(CODE_TO_2TO3):
  2842. yield (code.upper() + (4 - len(code)) * ' ',
  2843. re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
  2844. def docstring_summary(docstring):
  2845. """Return summary of docstring."""
  2846. return docstring.split('\n')[0] if docstring else ''
  2847. def line_shortening_rank(candidate, indent_word, max_line_length,
  2848. experimental=False):
  2849. """Return rank of candidate.
  2850. This is for sorting candidates.
  2851. """
  2852. if not candidate.strip():
  2853. return 0
  2854. rank = 0
  2855. lines = candidate.rstrip().split('\n')
  2856. offset = 0
  2857. if (
  2858. not lines[0].lstrip().startswith('#') and
  2859. lines[0].rstrip()[-1] not in '([{'
  2860. ):
  2861. for (opening, closing) in ('()', '[]', '{}'):
  2862. # Don't penalize empty containers that aren't split up. Things like
  2863. # this "foo(\n )" aren't particularly good.
  2864. opening_loc = lines[0].find(opening)
  2865. closing_loc = lines[0].find(closing)
  2866. if opening_loc >= 0:
  2867. if closing_loc < 0 or closing_loc != opening_loc + 1:
  2868. offset = max(offset, 1 + opening_loc)
  2869. current_longest = max(offset + len(x.strip()) for x in lines)
  2870. rank += 4 * max(0, current_longest - max_line_length)
  2871. rank += len(lines)
  2872. # Too much variation in line length is ugly.
  2873. rank += 2 * standard_deviation(len(line) for line in lines)
  2874. bad_staring_symbol = {
  2875. '(': ')',
  2876. '[': ']',
  2877. '{': '}'}.get(lines[0][-1])
  2878. if len(lines) > 1:
  2879. if (
  2880. bad_staring_symbol and
  2881. lines[1].lstrip().startswith(bad_staring_symbol)
  2882. ):
  2883. rank += 20
  2884. for lineno, current_line in enumerate(lines):
  2885. current_line = current_line.strip()
  2886. if current_line.startswith('#'):
  2887. continue
  2888. for bad_start in ['.', '%', '+', '-', '/']:
  2889. if current_line.startswith(bad_start):
  2890. rank += 100
  2891. # Do not tolerate operators on their own line.
  2892. if current_line == bad_start:
  2893. rank += 1000
  2894. if (
  2895. current_line.endswith(('.', '%', '+', '-', '/')) and
  2896. "': " in current_line
  2897. ):
  2898. rank += 1000
  2899. if current_line.endswith(('(', '[', '{', '.')):
  2900. # Avoid lonely opening. They result in longer lines.
  2901. if len(current_line) <= len(indent_word):
  2902. rank += 100
  2903. # Avoid the ugliness of ", (\n".
  2904. if (
  2905. current_line.endswith('(') and
  2906. current_line[:-1].rstrip().endswith(',')
  2907. ):
  2908. rank += 100
  2909. # Avoid the ugliness of "something[\n" and something[index][\n.
  2910. if (
  2911. current_line.endswith('[') and
  2912. len(current_line) > 1 and
  2913. (current_line[-2].isalnum() or current_line[-2] in ']')
  2914. ):
  2915. rank += 300
  2916. # Also avoid the ugliness of "foo.\nbar"
  2917. if current_line.endswith('.'):
  2918. rank += 100
  2919. if has_arithmetic_operator(current_line):
  2920. rank += 100
  2921. # Avoid breaking at unary operators.
  2922. if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')):
  2923. rank += 1000
  2924. if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')):
  2925. rank += 1000
  2926. if current_line.endswith(('%', '(', '[', '{')):
  2927. rank -= 20
  2928. # Try to break list comprehensions at the "for".
  2929. if current_line.startswith('for '):
  2930. rank -= 50
  2931. if current_line.endswith('\\'):
  2932. # If a line ends in \-newline, it may be part of a
  2933. # multiline string. In that case, we would like to know
  2934. # how long that line is without the \-newline. If it's
  2935. # longer than the maximum, or has comments, then we assume
  2936. # that the \-newline is an okay candidate and only
  2937. # penalize it a bit.
  2938. total_len = len(current_line)
  2939. lineno += 1
  2940. while lineno < len(lines):
  2941. total_len += len(lines[lineno])
  2942. if lines[lineno].lstrip().startswith('#'):
  2943. total_len = max_line_length
  2944. break
  2945. if not lines[lineno].endswith('\\'):
  2946. break
  2947. lineno += 1
  2948. if total_len < max_line_length:
  2949. rank += 10
  2950. else:
  2951. rank += 100 if experimental else 1
  2952. # Prefer breaking at commas rather than colon.
  2953. if ',' in current_line and current_line.endswith(':'):
  2954. rank += 10
  2955. # Avoid splitting dictionaries between key and value.
  2956. if current_line.endswith(':'):
  2957. rank += 100
  2958. rank += 10 * count_unbalanced_brackets(current_line)
  2959. return max(0, rank)
  2960. def standard_deviation(numbers):
  2961. """Return standard devation."""
  2962. numbers = list(numbers)
  2963. if not numbers:
  2964. return 0
  2965. mean = sum(numbers) / len(numbers)
  2966. return (sum((n - mean) ** 2 for n in numbers) /
  2967. len(numbers)) ** .5
  2968. def has_arithmetic_operator(line):
  2969. """Return True if line contains any arithmetic operators."""
  2970. for operator in pycodestyle.ARITHMETIC_OP:
  2971. if operator in line:
  2972. return True
  2973. return False
  2974. def count_unbalanced_brackets(line):
  2975. """Return number of unmatched open/close brackets."""
  2976. count = 0
  2977. for opening, closing in ['()', '[]', '{}']:
  2978. count += abs(line.count(opening) - line.count(closing))
  2979. return count
  2980. def split_at_offsets(line, offsets):
  2981. """Split line at offsets.
  2982. Return list of strings.
  2983. """
  2984. result = []
  2985. previous_offset = 0
  2986. current_offset = 0
  2987. for current_offset in sorted(offsets):
  2988. if current_offset < len(line) and previous_offset != current_offset:
  2989. result.append(line[previous_offset:current_offset].strip())
  2990. previous_offset = current_offset
  2991. result.append(line[current_offset:])
  2992. return result
  2993. class LineEndingWrapper(object):
  2994. r"""Replace line endings to work with sys.stdout.
  2995. It seems that sys.stdout expects only '\n' as the line ending, no matter
  2996. the platform. Otherwise, we get repeated line endings.
  2997. """
  2998. def __init__(self, output):
  2999. self.__output = output
  3000. def write(self, s):
  3001. self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
  3002. def flush(self):
  3003. self.__output.flush()
  3004. def match_file(filename, exclude):
  3005. """Return True if file is okay for modifying/recursing."""
  3006. base_name = os.path.basename(filename)
  3007. if base_name.startswith('.'):
  3008. return False
  3009. for pattern in exclude:
  3010. if fnmatch.fnmatch(base_name, pattern):
  3011. return False
  3012. if fnmatch.fnmatch(filename, pattern):
  3013. return False
  3014. if not os.path.isdir(filename) and not is_python_file(filename):
  3015. return False
  3016. return True
  3017. def find_files(filenames, recursive, exclude):
  3018. """Yield filenames."""
  3019. while filenames:
  3020. name = filenames.pop(0)
  3021. if recursive and os.path.isdir(name):
  3022. for root, directories, children in os.walk(name):
  3023. filenames += [os.path.join(root, f) for f in children
  3024. if match_file(os.path.join(root, f),
  3025. exclude)]
  3026. directories[:] = [d for d in directories
  3027. if match_file(os.path.join(root, d),
  3028. exclude)]
  3029. else:
  3030. yield name
  3031. def _fix_file(parameters):
  3032. """Helper function for optionally running fix_file() in parallel."""
  3033. if parameters[1].verbose:
  3034. print('[file:{}]'.format(parameters[0]), file=sys.stderr)
  3035. try:
  3036. fix_file(*parameters)
  3037. except IOError as error:
  3038. print(unicode(error), file=sys.stderr)
  3039. def fix_multiple_files(filenames, options, output=None):
  3040. """Fix list of files.
  3041. Optionally fix files recursively.
  3042. """
  3043. filenames = find_files(filenames, options.recursive, options.exclude)
  3044. if options.jobs > 1:
  3045. import multiprocessing
  3046. pool = multiprocessing.Pool(options.jobs)
  3047. pool.map(_fix_file,
  3048. [(name, options) for name in filenames])
  3049. else:
  3050. for name in filenames:
  3051. _fix_file((name, options, output))
  3052. def is_python_file(filename):
  3053. """Return True if filename is Python file."""
  3054. if filename.endswith('.py'):
  3055. return True
  3056. try:
  3057. with open_with_encoding(
  3058. filename,
  3059. limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f:
  3060. text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES)
  3061. if not text:
  3062. return False
  3063. first_line = text.splitlines()[0]
  3064. except (IOError, IndexError):
  3065. return False
  3066. if not PYTHON_SHEBANG_REGEX.match(first_line):
  3067. return False
  3068. return True
  3069. def is_probably_part_of_multiline(line):
  3070. """Return True if line is likely part of a multiline string.
  3071. When multiline strings are involved, pep8 reports the error as being
  3072. at the start of the multiline string, which doesn't work for us.
  3073. """
  3074. return (
  3075. '"""' in line or
  3076. "'''" in line or
  3077. line.rstrip().endswith('\\')
  3078. )
  3079. def wrap_output(output, encoding):
  3080. """Return output with specified encoding."""
  3081. return codecs.getwriter(encoding)(output.buffer
  3082. if hasattr(output, 'buffer')
  3083. else output)
  3084. def get_encoding():
  3085. """Return preferred encoding."""
  3086. return locale.getpreferredencoding() or sys.getdefaultencoding()
  3087. def main(argv=None, apply_config=True):
  3088. """Command-line entry."""
  3089. if argv is None:
  3090. argv = sys.argv
  3091. try:
  3092. # Exit on broken pipe.
  3093. signal.signal(signal.SIGPIPE, signal.SIG_DFL)
  3094. except AttributeError: # pragma: no cover
  3095. # SIGPIPE is not available on Windows.
  3096. pass
  3097. try:
  3098. args = parse_args(argv[1:], apply_config=apply_config)
  3099. if args.list_fixes:
  3100. for code, description in sorted(supported_fixes()):
  3101. print('{code} - {description}'.format(
  3102. code=code, description=description))
  3103. return 0
  3104. if args.files == ['-']:
  3105. assert not args.in_place
  3106. encoding = sys.stdin.encoding or get_encoding()
  3107. # LineEndingWrapper is unnecessary here due to the symmetry between
  3108. # standard in and standard out.
  3109. wrap_output(sys.stdout, encoding=encoding).write(
  3110. fix_code(sys.stdin.read(), args, encoding=encoding))
  3111. else:
  3112. if args.in_place or args.diff:
  3113. args.files = list(set(args.files))
  3114. else:
  3115. assert len(args.files) == 1
  3116. assert not args.recursive
  3117. fix_multiple_files(args.files, args, sys.stdout)
  3118. except KeyboardInterrupt:
  3119. return 1 # pragma: no cover
  3120. class CachedTokenizer(object):
  3121. """A one-element cache around tokenize.generate_tokens().
  3122. Original code written by Ned Batchelder, in coverage.py.
  3123. """
  3124. def __init__(self):
  3125. self.last_text = None
  3126. self.last_tokens = None
  3127. def generate_tokens(self, text):
  3128. """A stand-in for tokenize.generate_tokens()."""
  3129. if text != self.last_text:
  3130. string_io = io.StringIO(text)
  3131. self.last_tokens = list(
  3132. tokenize.generate_tokens(string_io.readline)
  3133. )
  3134. self.last_text = text
  3135. return self.last_tokens
  3136. _cached_tokenizer = CachedTokenizer()
  3137. generate_tokens = _cached_tokenizer.generate_tokens
  3138. if __name__ == '__main__':
  3139. sys.exit(main())