You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.py 2.5KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2009-2018 the sqlparse authors and contributors
  4. # <see AUTHORS file>
  5. #
  6. # This module is part of python-sqlparse and is released under
  7. # the BSD License: https://opensource.org/licenses/BSD-3-Clause
  8. """SQL Lexer"""
  9. # This code is based on the SqlLexer in pygments.
  10. # http://pygments.org/
  11. # It's separated from the rest of pygments to increase performance
  12. # and to allow some customizations.
  13. from sqlparse import tokens
  14. from sqlparse.keywords import SQL_REGEX
  15. from sqlparse.compat import text_type, file_types
  16. from sqlparse.utils import consume
  17. class Lexer(object):
  18. """Lexer
  19. Empty class. Leaving for backwards-compatibility
  20. """
  21. @staticmethod
  22. def get_tokens(text, encoding=None):
  23. """
  24. Return an iterable of (tokentype, value) pairs generated from
  25. `text`. If `unfiltered` is set to `True`, the filtering mechanism
  26. is bypassed even if filters are defined.
  27. Also preprocess the text, i.e. expand tabs and strip it if
  28. wanted and applies registered filters.
  29. Split ``text`` into (tokentype, text) pairs.
  30. ``stack`` is the initial stack (default: ``['root']``)
  31. """
  32. if isinstance(text, file_types):
  33. text = text.read()
  34. if isinstance(text, text_type):
  35. pass
  36. elif isinstance(text, bytes):
  37. if encoding:
  38. text = text.decode(encoding)
  39. else:
  40. try:
  41. text = text.decode('utf-8')
  42. except UnicodeDecodeError:
  43. text = text.decode('unicode-escape')
  44. else:
  45. raise TypeError(u"Expected text or file-like object, got {!r}".
  46. format(type(text)))
  47. iterable = enumerate(text)
  48. for pos, char in iterable:
  49. for rexmatch, action in SQL_REGEX:
  50. m = rexmatch(text, pos)
  51. if not m:
  52. continue
  53. elif isinstance(action, tokens._TokenType):
  54. yield action, m.group()
  55. elif callable(action):
  56. yield action(m.group())
  57. consume(iterable, m.end() - pos - 1)
  58. break
  59. else:
  60. yield tokens.Error, char
  61. def tokenize(sql, encoding=None):
  62. """Tokenize sql.
  63. Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
  64. of ``(token type, value)`` items.
  65. """
  66. return Lexer().get_tokens(sql, encoding)