You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

aligned_indent.py 5.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2018 the sqlparse authors and contributors
  4. # <see AUTHORS file>
  5. #
  6. # This module is part of python-sqlparse and is released under
  7. # the BSD License: https://opensource.org/licenses/BSD-3-Clause
  8. from sqlparse import sql, tokens as T
  9. from sqlparse.compat import text_type
  10. from sqlparse.utils import offset, indent
  11. class AlignedIndentFilter(object):
  12. join_words = (r'((LEFT\s+|RIGHT\s+|FULL\s+)?'
  13. r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|'
  14. r'(CROSS\s+|NATURAL\s+)?)?JOIN\b')
  15. by_words = r'(GROUP|ORDER)\s+BY\b'
  16. split_words = ('FROM',
  17. join_words, 'ON', by_words,
  18. 'WHERE', 'AND', 'OR',
  19. 'HAVING', 'LIMIT',
  20. 'UNION', 'VALUES',
  21. 'SET', 'BETWEEN', 'EXCEPT')
  22. def __init__(self, char=' ', n='\n'):
  23. self.n = n
  24. self.offset = 0
  25. self.indent = 0
  26. self.char = char
  27. self._max_kwd_len = len('select')
  28. def nl(self, offset=1):
  29. # offset = 1 represent a single space after SELECT
  30. offset = -len(offset) if not isinstance(offset, int) else offset
  31. # add two for the space and parenthesis
  32. indent = self.indent * (2 + self._max_kwd_len)
  33. return sql.Token(T.Whitespace, self.n + self.char * (
  34. self._max_kwd_len + offset + indent + self.offset))
  35. def _process_statement(self, tlist):
  36. if len(tlist.tokens) > 0 and tlist.tokens[0].is_whitespace \
  37. and self.indent == 0:
  38. tlist.tokens.pop(0)
  39. # process the main query body
  40. self._process(sql.TokenList(tlist.tokens))
  41. def _process_parenthesis(self, tlist):
  42. # if this isn't a subquery, don't re-indent
  43. _, token = tlist.token_next_by(m=(T.DML, 'SELECT'))
  44. if token is not None:
  45. with indent(self):
  46. tlist.insert_after(tlist[0], self.nl('SELECT'))
  47. # process the inside of the parenthesis
  48. self._process_default(tlist)
  49. # de-indent last parenthesis
  50. tlist.insert_before(tlist[-1], self.nl())
  51. def _process_identifierlist(self, tlist):
  52. # columns being selected
  53. identifiers = list(tlist.get_identifiers())
  54. identifiers.pop(0)
  55. [tlist.insert_before(token, self.nl()) for token in identifiers]
  56. self._process_default(tlist)
  57. def _process_case(self, tlist):
  58. offset_ = len('case ') + len('when ')
  59. cases = tlist.get_cases(skip_ws=True)
  60. # align the end as well
  61. end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1]
  62. cases.append((None, [end_token]))
  63. condition_width = [len(' '.join(map(text_type, cond))) if cond else 0
  64. for cond, _ in cases]
  65. max_cond_width = max(condition_width)
  66. for i, (cond, value) in enumerate(cases):
  67. # cond is None when 'else or end'
  68. stmt = cond[0] if cond else value[0]
  69. if i > 0:
  70. tlist.insert_before(stmt, self.nl(
  71. offset_ - len(text_type(stmt))))
  72. if cond:
  73. ws = sql.Token(T.Whitespace, self.char * (
  74. max_cond_width - condition_width[i]))
  75. tlist.insert_after(cond[-1], ws)
  76. def _next_token(self, tlist, idx=-1):
  77. split_words = T.Keyword, self.split_words, True
  78. tidx, token = tlist.token_next_by(m=split_words, idx=idx)
  79. # treat "BETWEEN x and y" as a single statement
  80. if token and token.normalized == 'BETWEEN':
  81. tidx, token = self._next_token(tlist, tidx)
  82. if token and token.normalized == 'AND':
  83. tidx, token = self._next_token(tlist, tidx)
  84. return tidx, token
  85. def _split_kwds(self, tlist):
  86. tidx, token = self._next_token(tlist)
  87. while token:
  88. # joins, group/order by are special case. only consider the first
  89. # word as aligner
  90. if (
  91. token.match(T.Keyword, self.join_words, regex=True) or
  92. token.match(T.Keyword, self.by_words, regex=True)
  93. ):
  94. token_indent = token.value.split()[0]
  95. else:
  96. token_indent = text_type(token)
  97. tlist.insert_before(token, self.nl(token_indent))
  98. tidx += 1
  99. tidx, token = self._next_token(tlist, tidx)
  100. def _process_default(self, tlist):
  101. self._split_kwds(tlist)
  102. # process any sub-sub statements
  103. for sgroup in tlist.get_sublists():
  104. idx = tlist.token_index(sgroup)
  105. pidx, prev_ = tlist.token_prev(idx)
  106. # HACK: make "group/order by" work. Longer than max_len.
  107. offset_ = 3 if (
  108. prev_ and prev_.match(T.Keyword, self.by_words, regex=True)
  109. ) else 0
  110. with offset(self, offset_):
  111. self._process(sgroup)
  112. def _process(self, tlist):
  113. func_name = '_process_{cls}'.format(cls=type(tlist).__name__)
  114. func = getattr(self, func_name.lower(), self._process_default)
  115. func(tlist)
  116. def process(self, stmt):
  117. self._process(stmt)
  118. return stmt