Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

grouping.py 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. #
  2. # Copyright (C) 2009-2020 the sqlparse authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of python-sqlparse and is released under
  6. # the BSD License: https://opensource.org/licenses/BSD-3-Clause
  7. from sqlparse import sql
  8. from sqlparse import tokens as T
  9. from sqlparse.utils import recurse, imt
  10. T_NUMERICAL = (T.Number, T.Number.Integer, T.Number.Float)
  11. T_STRING = (T.String, T.String.Single, T.String.Symbol)
  12. T_NAME = (T.Name, T.Name.Placeholder)
  13. def _group_matching(tlist, cls):
  14. """Groups Tokens that have beginning and end."""
  15. opens = []
  16. tidx_offset = 0
  17. for idx, token in enumerate(list(tlist)):
  18. tidx = idx - tidx_offset
  19. if token.is_whitespace:
  20. # ~50% of tokens will be whitespace. Will checking early
  21. # for them avoid 3 comparisons, but then add 1 more comparison
  22. # for the other ~50% of tokens...
  23. continue
  24. if token.is_group and not isinstance(token, cls):
  25. # Check inside previously grouped (i.e. parenthesis) if group
  26. # of different type is inside (i.e., case). though ideally should
  27. # should check for all open/close tokens at once to avoid recursion
  28. _group_matching(token, cls)
  29. continue
  30. if token.match(*cls.M_OPEN):
  31. opens.append(tidx)
  32. elif token.match(*cls.M_CLOSE):
  33. try:
  34. open_idx = opens.pop()
  35. except IndexError:
  36. # this indicates invalid sql and unbalanced tokens.
  37. # instead of break, continue in case other "valid" groups exist
  38. continue
  39. close_idx = tidx
  40. tlist.group_tokens(cls, open_idx, close_idx)
  41. tidx_offset += close_idx - open_idx
  42. def group_brackets(tlist):
  43. _group_matching(tlist, sql.SquareBrackets)
  44. def group_parenthesis(tlist):
  45. _group_matching(tlist, sql.Parenthesis)
  46. def group_case(tlist):
  47. _group_matching(tlist, sql.Case)
  48. def group_if(tlist):
  49. _group_matching(tlist, sql.If)
  50. def group_for(tlist):
  51. _group_matching(tlist, sql.For)
  52. def group_begin(tlist):
  53. _group_matching(tlist, sql.Begin)
  54. def group_typecasts(tlist):
  55. def match(token):
  56. return token.match(T.Punctuation, '::')
  57. def valid(token):
  58. return token is not None
  59. def post(tlist, pidx, tidx, nidx):
  60. return pidx, nidx
  61. valid_prev = valid_next = valid
  62. _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
  63. def group_tzcasts(tlist):
  64. def match(token):
  65. return token.ttype == T.Keyword.TZCast
  66. def valid_prev(token):
  67. return token is not None
  68. def valid_next(token):
  69. return token is not None and (
  70. token.is_whitespace
  71. or token.match(T.Keyword, 'AS')
  72. or token.match(*sql.TypedLiteral.M_CLOSE)
  73. )
  74. def post(tlist, pidx, tidx, nidx):
  75. return pidx, nidx
  76. _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
  77. def group_typed_literal(tlist):
  78. # definitely not complete, see e.g.:
  79. # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literal-syntax
  80. # https://docs.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literals
  81. # https://www.postgresql.org/docs/9.1/datatype-datetime.html
  82. # https://www.postgresql.org/docs/9.1/functions-datetime.html
  83. def match(token):
  84. return imt(token, m=sql.TypedLiteral.M_OPEN)
  85. def match_to_extend(token):
  86. return isinstance(token, sql.TypedLiteral)
  87. def valid_prev(token):
  88. return token is not None
  89. def valid_next(token):
  90. return token is not None and token.match(*sql.TypedLiteral.M_CLOSE)
  91. def valid_final(token):
  92. return token is not None and token.match(*sql.TypedLiteral.M_EXTEND)
  93. def post(tlist, pidx, tidx, nidx):
  94. return tidx, nidx
  95. _group(tlist, sql.TypedLiteral, match, valid_prev, valid_next,
  96. post, extend=False)
  97. _group(tlist, sql.TypedLiteral, match_to_extend, valid_prev, valid_final,
  98. post, extend=True)
  99. def group_period(tlist):
  100. def match(token):
  101. return token.match(T.Punctuation, '.')
  102. def valid_prev(token):
  103. sqlcls = sql.SquareBrackets, sql.Identifier
  104. ttypes = T.Name, T.String.Symbol
  105. return imt(token, i=sqlcls, t=ttypes)
  106. def valid_next(token):
  107. # issue261, allow invalid next token
  108. return True
  109. def post(tlist, pidx, tidx, nidx):
  110. # next_ validation is being performed here. issue261
  111. sqlcls = sql.SquareBrackets, sql.Function
  112. ttypes = T.Name, T.String.Symbol, T.Wildcard
  113. next_ = tlist[nidx] if nidx is not None else None
  114. valid_next = imt(next_, i=sqlcls, t=ttypes)
  115. return (pidx, nidx) if valid_next else (pidx, tidx)
  116. _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
  117. def group_as(tlist):
  118. def match(token):
  119. return token.is_keyword and token.normalized == 'AS'
  120. def valid_prev(token):
  121. return token.normalized == 'NULL' or not token.is_keyword
  122. def valid_next(token):
  123. ttypes = T.DML, T.DDL, T.CTE
  124. return not imt(token, t=ttypes) and token is not None
  125. def post(tlist, pidx, tidx, nidx):
  126. return pidx, nidx
  127. _group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
  128. def group_assignment(tlist):
  129. def match(token):
  130. return token.match(T.Assignment, ':=')
  131. def valid(token):
  132. return token is not None and token.ttype not in (T.Keyword)
  133. def post(tlist, pidx, tidx, nidx):
  134. m_semicolon = T.Punctuation, ';'
  135. snidx, _ = tlist.token_next_by(m=m_semicolon, idx=nidx)
  136. nidx = snidx or nidx
  137. return pidx, nidx
  138. valid_prev = valid_next = valid
  139. _group(tlist, sql.Assignment, match, valid_prev, valid_next, post)
  140. def group_comparison(tlist):
  141. sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier,
  142. sql.Operation, sql.TypedLiteral)
  143. ttypes = T_NUMERICAL + T_STRING + T_NAME
  144. def match(token):
  145. return token.ttype == T.Operator.Comparison
  146. def valid(token):
  147. if imt(token, t=ttypes, i=sqlcls):
  148. return True
  149. elif token and token.is_keyword and token.normalized == 'NULL':
  150. return True
  151. else:
  152. return False
  153. def post(tlist, pidx, tidx, nidx):
  154. return pidx, nidx
  155. valid_prev = valid_next = valid
  156. _group(tlist, sql.Comparison, match,
  157. valid_prev, valid_next, post, extend=False)
  158. @recurse(sql.Identifier)
  159. def group_identifier(tlist):
  160. ttypes = (T.String.Symbol, T.Name)
  161. tidx, token = tlist.token_next_by(t=ttypes)
  162. while token:
  163. tlist.group_tokens(sql.Identifier, tidx, tidx)
  164. tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
  165. def group_arrays(tlist):
  166. sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function
  167. ttypes = T.Name, T.String.Symbol
  168. def match(token):
  169. return isinstance(token, sql.SquareBrackets)
  170. def valid_prev(token):
  171. return imt(token, i=sqlcls, t=ttypes)
  172. def valid_next(token):
  173. return True
  174. def post(tlist, pidx, tidx, nidx):
  175. return pidx, tidx
  176. _group(tlist, sql.Identifier, match,
  177. valid_prev, valid_next, post, extend=True, recurse=False)
  178. def group_operator(tlist):
  179. ttypes = T_NUMERICAL + T_STRING + T_NAME
  180. sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
  181. sql.Identifier, sql.Operation, sql.TypedLiteral)
  182. def match(token):
  183. return imt(token, t=(T.Operator, T.Wildcard))
  184. def valid(token):
  185. return imt(token, i=sqlcls, t=ttypes) \
  186. or (token and token.match(
  187. T.Keyword,
  188. ('CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP')))
  189. def post(tlist, pidx, tidx, nidx):
  190. tlist[tidx].ttype = T.Operator
  191. return pidx, nidx
  192. valid_prev = valid_next = valid
  193. _group(tlist, sql.Operation, match,
  194. valid_prev, valid_next, post, extend=False)
  195. def group_identifier_list(tlist):
  196. m_role = T.Keyword, ('null', 'role')
  197. sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
  198. sql.IdentifierList, sql.Operation)
  199. ttypes = (T_NUMERICAL + T_STRING + T_NAME
  200. + (T.Keyword, T.Comment, T.Wildcard))
  201. def match(token):
  202. return token.match(T.Punctuation, ',')
  203. def valid(token):
  204. return imt(token, i=sqlcls, m=m_role, t=ttypes)
  205. def post(tlist, pidx, tidx, nidx):
  206. return pidx, nidx
  207. valid_prev = valid_next = valid
  208. _group(tlist, sql.IdentifierList, match,
  209. valid_prev, valid_next, post, extend=True)
  210. @recurse(sql.Comment)
  211. def group_comments(tlist):
  212. tidx, token = tlist.token_next_by(t=T.Comment)
  213. while token:
  214. eidx, end = tlist.token_not_matching(
  215. lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace, idx=tidx)
  216. if end is not None:
  217. eidx, end = tlist.token_prev(eidx, skip_ws=False)
  218. tlist.group_tokens(sql.Comment, tidx, eidx)
  219. tidx, token = tlist.token_next_by(t=T.Comment, idx=tidx)
  220. @recurse(sql.Where)
  221. def group_where(tlist):
  222. tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN)
  223. while token:
  224. eidx, end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tidx)
  225. if end is None:
  226. end = tlist._groupable_tokens[-1]
  227. else:
  228. end = tlist.tokens[eidx - 1]
  229. # TODO: convert this to eidx instead of end token.
  230. # i think above values are len(tlist) and eidx-1
  231. eidx = tlist.token_index(end)
  232. tlist.group_tokens(sql.Where, tidx, eidx)
  233. tidx, token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tidx)
  234. @recurse()
  235. def group_aliased(tlist):
  236. I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
  237. sql.Operation, sql.Comparison)
  238. tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
  239. while token:
  240. nidx, next_ = tlist.token_next(tidx)
  241. if isinstance(next_, sql.Identifier):
  242. tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True)
  243. tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx)
  244. @recurse(sql.Function)
  245. def group_functions(tlist):
  246. has_create = False
  247. has_table = False
  248. has_as = False
  249. for tmp_token in tlist.tokens:
  250. if tmp_token.value.upper() == 'CREATE':
  251. has_create = True
  252. if tmp_token.value.upper() == 'TABLE':
  253. has_table = True
  254. if tmp_token.value == 'AS':
  255. has_as = True
  256. if has_create and has_table and not has_as:
  257. return
  258. tidx, token = tlist.token_next_by(t=T.Name)
  259. while token:
  260. nidx, next_ = tlist.token_next(tidx)
  261. if isinstance(next_, sql.Parenthesis):
  262. tlist.group_tokens(sql.Function, tidx, nidx)
  263. tidx, token = tlist.token_next_by(t=T.Name, idx=tidx)
  264. def group_order(tlist):
  265. """Group together Identifier and Asc/Desc token"""
  266. tidx, token = tlist.token_next_by(t=T.Keyword.Order)
  267. while token:
  268. pidx, prev_ = tlist.token_prev(tidx)
  269. if imt(prev_, i=sql.Identifier, t=T.Number):
  270. tlist.group_tokens(sql.Identifier, pidx, tidx)
  271. tidx = pidx
  272. tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx)
  273. @recurse()
  274. def align_comments(tlist):
  275. tidx, token = tlist.token_next_by(i=sql.Comment)
  276. while token:
  277. pidx, prev_ = tlist.token_prev(tidx)
  278. if isinstance(prev_, sql.TokenList):
  279. tlist.group_tokens(sql.TokenList, pidx, tidx, extend=True)
  280. tidx = pidx
  281. tidx, token = tlist.token_next_by(i=sql.Comment, idx=tidx)
  282. def group_values(tlist):
  283. tidx, token = tlist.token_next_by(m=(T.Keyword, 'VALUES'))
  284. start_idx = tidx
  285. end_idx = -1
  286. while token:
  287. if isinstance(token, sql.Parenthesis):
  288. end_idx = tidx
  289. tidx, token = tlist.token_next(tidx)
  290. if end_idx != -1:
  291. tlist.group_tokens(sql.Values, start_idx, end_idx, extend=True)
  292. def group(stmt):
  293. for func in [
  294. group_comments,
  295. # _group_matching
  296. group_brackets,
  297. group_parenthesis,
  298. group_case,
  299. group_if,
  300. group_for,
  301. group_begin,
  302. group_functions,
  303. group_where,
  304. group_period,
  305. group_arrays,
  306. group_identifier,
  307. group_order,
  308. group_typecasts,
  309. group_tzcasts,
  310. group_typed_literal,
  311. group_operator,
  312. group_comparison,
  313. group_as,
  314. group_aliased,
  315. group_assignment,
  316. align_comments,
  317. group_identifier_list,
  318. group_values,
  319. ]:
  320. func(stmt)
  321. return stmt
  322. def _group(tlist, cls, match,
  323. valid_prev=lambda t: True,
  324. valid_next=lambda t: True,
  325. post=None,
  326. extend=True,
  327. recurse=True
  328. ):
  329. """Groups together tokens that are joined by a middle token. i.e. x < y"""
  330. tidx_offset = 0
  331. pidx, prev_ = None, None
  332. for idx, token in enumerate(list(tlist)):
  333. tidx = idx - tidx_offset
  334. if tidx < 0: # tidx shouldn't get negative
  335. continue
  336. if token.is_whitespace:
  337. continue
  338. if recurse and token.is_group and not isinstance(token, cls):
  339. _group(token, cls, match, valid_prev, valid_next, post, extend)
  340. if match(token):
  341. nidx, next_ = tlist.token_next(tidx)
  342. if prev_ and valid_prev(prev_) and valid_next(next_):
  343. from_idx, to_idx = post(tlist, pidx, tidx, nidx)
  344. grp = tlist.group_tokens(cls, from_idx, to_idx, extend=extend)
  345. tidx_offset += to_idx - from_idx
  346. pidx, prev_ = from_idx, grp
  347. continue
  348. pidx, prev_ = tidx, token