Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.py 2.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com
  4. #
  5. # This module is part of python-sqlparse and is released under
  6. # the BSD License: https://opensource.org/licenses/BSD-3-Clause
  7. """SQL Lexer"""
  8. # This code is based on the SqlLexer in pygments.
  9. # http://pygments.org/
  10. # It's separated from the rest of pygments to increase performance
  11. # and to allow some customizations.
  12. from sqlparse import tokens
  13. from sqlparse.keywords import SQL_REGEX
  14. from sqlparse.compat import bytes_type, text_type, file_types
  15. from sqlparse.utils import consume
  16. class Lexer(object):
  17. """Lexer
  18. Empty class. Leaving for backwards-compatibility
  19. """
  20. @staticmethod
  21. def get_tokens(text, encoding=None):
  22. """
  23. Return an iterable of (tokentype, value) pairs generated from
  24. `text`. If `unfiltered` is set to `True`, the filtering mechanism
  25. is bypassed even if filters are defined.
  26. Also preprocess the text, i.e. expand tabs and strip it if
  27. wanted and applies registered filters.
  28. Split ``text`` into (tokentype, text) pairs.
  29. ``stack`` is the inital stack (default: ``['root']``)
  30. """
  31. if isinstance(text, file_types):
  32. text = text.read()
  33. if isinstance(text, text_type):
  34. pass
  35. elif isinstance(text, bytes_type):
  36. if encoding:
  37. text = text.decode(encoding)
  38. else:
  39. try:
  40. text = text.decode('utf-8')
  41. except UnicodeDecodeError:
  42. text = text.decode('unicode-escape')
  43. else:
  44. raise TypeError(u"Expected text or file-like object, got {!r}".
  45. format(type(text)))
  46. iterable = enumerate(text)
  47. for pos, char in iterable:
  48. for rexmatch, action in SQL_REGEX:
  49. m = rexmatch(text, pos)
  50. if not m:
  51. continue
  52. elif isinstance(action, tokens._TokenType):
  53. yield action, m.group()
  54. elif callable(action):
  55. yield action(m.group())
  56. consume(iterable, m.end() - pos - 1)
  57. break
  58. else:
  59. yield tokens.Error, char
  60. def tokenize(sql, encoding=None):
  61. """Tokenize sql.
  62. Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream
  63. of ``(token type, value)`` items.
  64. """
  65. return Lexer().get_tokens(sql, encoding)