Funktionierender Prototyp des Serious Games zur Vermittlung von Wissen zu Software-Engineering-Arbeitsmodellen.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

htmlizer.py 3.5KB

1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. # -*- test-case-name: twisted.python.test.test_htmlizer -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. HTML rendering of Python source.
  6. """
  7. import keyword
  8. import tokenize
  9. from html import escape
  10. from typing import List
  11. from . import reflect
  12. class TokenPrinter:
  13. """
  14. Format a stream of tokens and intermediate whitespace, for pretty-printing.
  15. """
  16. currentCol, currentLine = 0, 1
  17. lastIdentifier = parameters = 0
  18. encoding = "utf-8"
  19. def __init__(self, writer):
  20. """
  21. @param writer: A file-like object, opened in bytes mode.
  22. """
  23. self.writer = writer
  24. def printtoken(self, type, token, sCoordinates, eCoordinates, line):
  25. if hasattr(tokenize, "ENCODING") and type == tokenize.ENCODING:
  26. self.encoding = token
  27. return
  28. if not isinstance(token, bytes):
  29. token = token.encode(self.encoding)
  30. (srow, scol) = sCoordinates
  31. (erow, ecol) = eCoordinates
  32. if self.currentLine < srow:
  33. self.writer(b"\n" * (srow - self.currentLine))
  34. self.currentLine, self.currentCol = srow, 0
  35. self.writer(b" " * (scol - self.currentCol))
  36. if self.lastIdentifier:
  37. type = "identifier"
  38. self.parameters = 1
  39. elif type == tokenize.NAME:
  40. if keyword.iskeyword(token):
  41. type = "keyword"
  42. else:
  43. if self.parameters:
  44. type = "parameter"
  45. else:
  46. type = "variable"
  47. else:
  48. type = tokenize.tok_name.get(type)
  49. assert type is not None
  50. type = type.lower()
  51. self.writer(token, type)
  52. self.currentCol = ecol
  53. self.currentLine += token.count(b"\n")
  54. if self.currentLine != erow:
  55. self.currentCol = 0
  56. self.lastIdentifier = token in (b"def", b"class")
  57. if token == b":":
  58. self.parameters = 0
  59. class HTMLWriter:
  60. """
  61. Write the stream of tokens and whitespace from L{TokenPrinter}, formating
  62. tokens as HTML spans.
  63. """
  64. noSpan: List[str] = []
  65. def __init__(self, writer):
  66. self.writer = writer
  67. noSpan: List[str] = []
  68. reflect.accumulateClassList(self.__class__, "noSpan", noSpan)
  69. self.noSpan = noSpan
  70. def write(self, token, type=None):
  71. if isinstance(token, bytes):
  72. token = token.decode("utf-8")
  73. token = escape(token)
  74. token = token.encode("utf-8")
  75. if (type is None) or (type in self.noSpan):
  76. self.writer(token)
  77. else:
  78. self.writer(
  79. b'<span class="py-src-'
  80. + type.encode("utf-8")
  81. + b'">'
  82. + token
  83. + b"</span>"
  84. )
  85. class SmallerHTMLWriter(HTMLWriter):
  86. """
  87. HTMLWriter that doesn't generate spans for some junk.
  88. Results in much smaller HTML output.
  89. """
  90. noSpan = ["endmarker", "indent", "dedent", "op", "newline", "nl"]
  91. def filter(inp, out, writer=HTMLWriter):
  92. out.write(b"<pre>")
  93. printer = TokenPrinter(writer(out.write).write).printtoken
  94. try:
  95. for token in tokenize.tokenize(inp.readline):
  96. (tokenType, string, start, end, line) = token
  97. printer(tokenType, string, start, end, line)
  98. except tokenize.TokenError:
  99. pass
  100. out.write(b"</pre>\n")
  101. def main():
  102. import sys
  103. stdout = getattr(sys.stdout, "buffer", sys.stdout)
  104. with open(sys.argv[1], "rb") as f:
  105. filter(f, stdout)
  106. if __name__ == "__main__":
  107. main()