Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

grouping.py 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com
  4. #
  5. # This module is part of python-sqlparse and is released under
  6. # the BSD License: https://opensource.org/licenses/BSD-3-Clause
  7. from sqlparse import sql
  8. from sqlparse import tokens as T
  9. from sqlparse.utils import recurse, imt
  10. T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
  11. T_STRING = (T.String, T.String.Single, T.String.Symbol)
  12. T_NAME = (T.Name, T.Name.Placeholder)
  13. def _group_matching(tlist, cls):
  14. """Groups Tokens that have beginning and end."""
  15. opens = []
  16. tidx_offset = 0
  17. for idx, token in enumerate(list(tlist)):
  18. tidx = idx - tidx_offset
  19. if token.is_whitespace:
  20. # ~50% of tokens will be whitespace. Will checking early
  21. # for them avoid 3 comparisons, but then add 1 more comparison
  22. # for the other ~50% of tokens...
  23. continue
  24. if token.is_group and not isinstance(token, cls):
  25. # Check inside previously grouped (ie. parenthesis) if group
  26. # of differnt type is inside (ie, case). though ideally should
  27. # should check for all open/close tokens at once to avoid recursion
  28. _group_matching(token, cls)
  29. continue
  30. if token.match(*cls.M_OPEN):
  31. opens.append(tidx)
  32. elif token.match(*cls.M_CLOSE):
  33. try:
  34. open_idx = opens.pop()
  35. except IndexError:
  36. # this indicates invalid sql and unbalanced tokens.
  37. # instead of break, continue in case other "valid" groups exist
  38. continue
  39. close_idx = tidx
  40. tlist.group_tokens(cls, open_idx, close_idx)
  41. tidx_offset += close_idx - open_idx
  42. def group_brackets(tlist):
  43. _group_matching(tlist, sql.SquareBrackets)
  44. def group_parenthesis(tlist):
  45. _group_matching(tlist, sql.Parenthesis)
  46. def group_case(tlist):
  47. _group_matching(tlist, sql.Case)
  48. def group_if(tlist):
  49. _group_matching(tlist, sql.If)
  50. def group_for(tlist):
  51. _group_matching(tlist, sql.For)
  52. def group_begin(tlist):
  53. _group_matching(tlist, sql.Begin)
  54. def group_typecasts(tlist):
  55. def match(token):
  56. return token.match(T.Punctuation, '::')
  57. def valid(token):
  58. return token is not None
  59. def post(tlist, pidx, tidx, nidx):
  60. return pidx, nidx
  61. valid_prev = valid_next = valid
  62. _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
  63. def group_period(tlist):
  64. def match(token):
  65. return token.match(T.Punctuation, '.')
  66. def valid_prev(token):
  67. sqlcls = sql.SquareBrackets, sql.Identifier
  68. ttypes = T.Name, T.String.Symbol
  69. return imt(token, i=sqlcls, t=ttypes)
  70. def valid_next(token):
  71. # issue261, allow invalid next token
  72. return True
  73. def post(tlist, pidx, tidx, nidx):
  74. # next_ validation is being performed here. issue261
  75. sqlcls = sql.SquareBrackets, sql.Function
  76. ttypes = T.Name, T.String.Symbol, T.Wildcard
  77. next_ = tlist[nidx] if nidx is not None else None
  78. valid_next = imt(next_, i=sqlcls, t=ttypes)
  79. return (pidx, nidx) if valid_next else (pidx, tidx)
  80. _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
  81. def group_as(tlist):
  82. def match(token):
  83. return token.is_keyword and token.normalized == 'AS'
  84. def valid_prev(token):
  85. return token.normalized == 'NULL' or not token.is_keyword
  86. def valid_next(token):
  87. ttypes = T.DML, T.DDL
  88. return not imt(token, t=ttypes) and token is not None
  89. def post(tlist, pidx, tidx, nidx):
  90. return pidx, nidx
  91. _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
  92. def group_assignment(tlist):
  93. def match(token):
  94. return token.match(T.Assignment, ':=')
  95. def valid(token):
  96. return token is not None and token.ttype not in (T.Keyword)
  97. def post(tlist, pidx, tidx, nidx):
  98. m_semicolon = T.Punctuation, ';'
  99. snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx)
  100. nidx = snidx or nidx
  101. return pidx, nidx
  102. valid_prev = valid_next = valid
  103. _group(tlist, sql.Assignment, match, valid_prev, valid_next, post)
  104. def group_comparison(tlist):
  105. sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier,
  106. sql.Operation)
  107. ttypes = T_NUMERICAL + T_STRING + T_NAME
  108. def match(token):
  109. return token.ttype == T.Operator.Comparison
  110. def valid(token):
  111. if imt(token, t=ttypes, i=sqlcls):
  112. return True
  113. elif token and token.is_keyword and token.normalized == 'NULL':
  114. return True
  115. else:
  116. return False
  117. def post(tlist, pidx, tidx, nidx):
  118. return pidx, nidx
  119. valid_prev = valid_next = valid
  120. _group(tlist, sql.Comparison, match,
  121. valid_prev, valid_next, post, extend=False)
  122. @recurse(sql.Identifier)
  123. def group_identifier(tlist):
  124. ttypes = (T.String.Symbol, T.Name)
  125. tidx, token = tlist.token_next_by(t=ttypes)
  126. while token:
  127. tlist.group_tokens(sql.Identifier, tidx, tidx)
  128. tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
  129. def group_arrays(tlist):
  130. sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function
  131. ttypes = T.Name, T.String.Symbol
  132. def match(token):
  133. return isinstance(token, sql.SquareBrackets)
  134. def valid_prev(token):
  135. return imt(token, i=sqlcls, t=ttypes)
  136. def valid_next(token):
  137. return True
  138. def post(tlist, pidx, tidx, nidx):
  139. return pidx, tidx
  140. _group(tlist, sql.Identifier, match,
  141. valid_prev, valid_next, post, extend=True, recurse=False)
  142. def group_operator(tlist):
  143. ttypes = T_NUMERICAL + T_STRING + T_NAME
  144. sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
  145. sql.Identifier, sql.Operation)
  146. def match(token):
  147. return imt(token, t=(T.Operator, T.Wildcard))
  148. def valid(token):
  149. return imt(token, i=sqlcls, t=ttypes)
  150. def post(tlist, pidx, tidx, nidx):
  151. tlist[tidx].ttype = T.Operator
  152. return pidx, nidx
  153. valid_prev = valid_next = valid
  154. _group(tlist, sql.Operation, match,
  155. valid_prev, valid_next, post, extend=False)
  156. def group_identifier_list(tlist):
  157. m_role = T.Keyword, ('null', 'role')
  158. sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
  159. sql.IdentifierList, sql.Operation)
  160. ttypes = (T_NUMERICAL + T_STRING + T_NAME +
  161. (T.Keyword, T.Comment, T.Wildcard))
  162. def match(token):
  163. return token.match(T.Punctuation, ',')
  164. def valid(token):
  165. return imt(token, i=sqlcls, m=m_role, t=ttypes)
  166. def post(tlist, pidx, tidx, nidx):
  167. return pidx, nidx
  168. valid_prev = valid_next = valid
  169. _group(tlist, sql.IdentifierList, match,
  170. valid_prev, valid_next, post, extend=True)
  171. @recurse(sql.Comment)
  172. def group_comments(tlist):
  173. tidx, token = tlist.token_next_by(t=T.Comment)
  174. while token:
  175. eidx, end = tlist.token_not_matching(
  176. lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace, idx=tidx)
  177. if end is not None:
  178. eidx, end = tlist.token_prev(eidx, skip_ws=False)
  179. tlist.group_tokens(sql.Comment, tidx, eidx)
  180. tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx)
  181. @recurse(sql.Where)
  182. def group_where(tlist):
  183. tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN)
  184. while token:
  185. eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx)
  186. if end is None:
  187. end = tlist._groupable_tokens[-1]
  188. else:
  189. end = tlist.tokens[eidx - 1]
  190. # TODO: convert this to eidx instead of end token.
  191. # i think above values are len(tlist) and eidx-1
  192. eidx = tlist.token_index(end)
  193. tlist.group_tokens(sql.Where, tidx, eidx)
  194. tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx)
  195. @recurse()
  196. def group_aliased(tlist):
  197. I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
  198. sql.Operation, sql.Comparison)
  199. tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
  200. while token:
  201. nidx, next_ = tlist.token_next(tidx)
  202. if isinstance(next_, sql.Identifier):
  203. tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True)
  204. tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx)
  205. @recurse(sql.Function)
  206. def group_functions(tlist):
  207. has_create = False
  208. has_table = False
  209. for tmp_token in tlist.tokens:
  210. if tmp_token.value == 'CREATE':
  211. has_create = True
  212. if tmp_token.value == 'TABLE':
  213. has_table = True
  214. if has_create and has_table:
  215. return
  216. tidx, token = tlist.token_next_by(t=T.Name)
  217. while token:
  218. nidx, next_ = tlist.token_next(tidx)
  219. if isinstance(next_, sql.Parenthesis):
  220. tlist.group_tokens(sql.Function, tidx, nidx)
  221. tidx, token = tlist.token_next_by(t=T.Name, idx=tidx)
  222. def group_order(tlist):
  223. """Group together Identifier and Asc/Desc token"""
  224. tidx, token = tlist.token_next_by(t=T.Keyword.Order)
  225. while token:
  226. pidx, prev_ = tlist.token_prev(tidx)
  227. if imt(prev_, i=sql.Identifier, t=T.Number):
  228. tlist.group_tokens(sql.Identifier, pidx, tidx)
  229. tidx = pidx
  230. tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx)
  231. @recurse()
  232. def align_comments(tlist):
  233. tidx, token = tlist.token_next_by(i=sql.Comment)
  234. while token:
  235. pidx, prev_ = tlist.token_prev(tidx)
  236. if isinstance(prev_, sql.TokenList):
  237. tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True)
  238. tidx = pidx
  239. tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx)
  240. def group(stmt):
  241. for func in [
  242. group_comments,
  243. # _group_matching
  244. group_brackets,
  245. group_parenthesis,
  246. group_case,
  247. group_if,
  248. group_for,
  249. group_begin,
  250. group_functions,
  251. group_where,
  252. group_period,
  253. group_arrays,
  254. group_identifier,
  255. group_order,
  256. group_typecasts,
  257. group_operator,
  258. group_comparison,
  259. group_as,
  260. group_aliased,
  261. group_assignment,
  262. align_comments,
  263. group_identifier_list,
  264. ]:
  265. func(stmt)
  266. return stmt
  267. def _group(tlist, cls, match,
  268. valid_prev=lambda t: True,
  269. valid_next=lambda t: True,
  270. post=None,
  271. extend=True,
  272. recurse=True
  273. ):
  274. """Groups together tokens that are joined by a middle token. ie. x < y"""
  275. tidx_offset = 0
  276. pidx, prev_ = None, None
  277. for idx, token in enumerate(list(tlist)):
  278. tidx = idx - tidx_offset
  279. if token.is_whitespace:
  280. continue
  281. if recurse and token.is_group and not isinstance(token, cls):
  282. _group(token, cls, match, valid_prev, valid_next, post, extend)
  283. if match(token):
  284. nidx, next_ = tlist.token_next(tidx)
  285. if prev_ and valid_prev(prev_) and valid_next(next_):
  286. from_idx, to_idx = post(tlist, pidx, tidx, nidx)
  287. grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend)
  288. tidx_offset += to_idx - from_idx
  289. pidx, prev_ = from_idx, grp
  290. continue
  291. pidx, prev_ = tidx, token