You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

_flatten.py 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. # -*- test-case-name: twisted.web.test.test_flatten -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Context-free flattener/serializer for rendering Python objects, possibly
  6. complex or arbitrarily nested, as strings.
  7. """
  8. from __future__ import division, absolute_import
  9. from io import BytesIO
  10. from sys import exc_info
  11. from types import GeneratorType
  12. from traceback import extract_tb
  13. try:
  14. from inspect import iscoroutine
  15. except ImportError:
  16. def iscoroutine(*args, **kwargs):
  17. return False
  18. from twisted.python.compat import unicode, nativeString, iteritems
  19. from twisted.internet.defer import Deferred, ensureDeferred
  20. from twisted.web._stan import Tag, slot, voidElements, Comment, CDATA, CharRef
  21. from twisted.web.error import UnfilledSlot, UnsupportedType, FlattenerError
  22. from twisted.web.iweb import IRenderable
  23. def escapeForContent(data):
  24. """
  25. Escape some character or UTF-8 byte data for inclusion in an HTML or XML
  26. document, by replacing metacharacters (C{&<>}) with their entity
  27. equivalents (C{&amp;&lt;&gt;}).
  28. This is used as an input to L{_flattenElement}'s C{dataEscaper} parameter.
  29. @type data: C{bytes} or C{unicode}
  30. @param data: The string to escape.
  31. @rtype: C{bytes}
  32. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  33. encoded string.
  34. """
  35. if isinstance(data, unicode):
  36. data = data.encode('utf-8')
  37. data = data.replace(b'&', b'&amp;'
  38. ).replace(b'<', b'&lt;'
  39. ).replace(b'>', b'&gt;')
  40. return data
  41. def attributeEscapingDoneOutside(data):
  42. """
  43. Escape some character or UTF-8 byte data for inclusion in the top level of
  44. an attribute. L{attributeEscapingDoneOutside} actually passes the data
  45. through unchanged, because L{writeWithAttributeEscaping} handles the
  46. quoting of the text within attributes outside the generator returned by
  47. L{_flattenElement}; this is used as the C{dataEscaper} argument to that
  48. L{_flattenElement} call so that that generator does not redundantly escape
  49. its text output.
  50. @type data: C{bytes} or C{unicode}
  51. @param data: The string to escape.
  52. @return: The string, unchanged, except for encoding.
  53. @rtype: C{bytes}
  54. """
  55. if isinstance(data, unicode):
  56. return data.encode("utf-8")
  57. return data
  58. def writeWithAttributeEscaping(write):
  59. """
  60. Decorate a C{write} callable so that all output written is properly quoted
  61. for inclusion within an XML attribute value.
  62. If a L{Tag <twisted.web.template.Tag>} C{x} is flattened within the context
  63. of the contents of another L{Tag <twisted.web.template.Tag>} C{y}, the
  64. metacharacters (C{<>&"}) delimiting C{x} should be passed through
  65. unchanged, but the textual content of C{x} should still be quoted, as
  66. usual. For example: C{<y><x>&amp;</x></y>}. That is the default behavior
  67. of L{_flattenElement} when L{escapeForContent} is passed as the
  68. C{dataEscaper}.
  69. However, when a L{Tag <twisted.web.template.Tag>} C{x} is flattened within
  70. the context of an I{attribute} of another L{Tag <twisted.web.template.Tag>}
  71. C{y}, then the metacharacters delimiting C{x} should be quoted so that it
  72. can be parsed from the attribute's value. In the DOM itself, this is not a
  73. valid thing to do, but given that renderers and slots may be freely moved
  74. around in a L{twisted.web.template} template, it is a condition which may
  75. arise in a document and must be handled in a way which produces valid
  76. output. So, for example, you should be able to get C{<y attr="&lt;x /&gt;"
  77. />}. This should also be true for other XML/HTML meta-constructs such as
  78. comments and CDATA, so if you were to serialize a L{comment
  79. <twisted.web.template.Comment>} in an attribute you should get C{<y
  80. attr="&lt;-- comment --&gt;" />}. Therefore in order to capture these
  81. meta-characters, flattening is done with C{write} callable that is wrapped
  82. with L{writeWithAttributeEscaping}.
  83. The final case, and hopefully the much more common one as compared to
  84. serializing L{Tag <twisted.web.template.Tag>} and arbitrary L{IRenderable}
  85. objects within an attribute, is to serialize a simple string, and those
  86. should be passed through for L{writeWithAttributeEscaping} to quote
  87. without applying a second, redundant level of quoting.
  88. @param write: A callable which will be invoked with the escaped L{bytes}.
  89. @return: A callable that writes data with escaping.
  90. """
  91. def _write(data):
  92. write(escapeForContent(data).replace(b'"', b'&quot;'))
  93. return _write
  94. def escapedCDATA(data):
  95. """
  96. Escape CDATA for inclusion in a document.
  97. @type data: L{str} or L{unicode}
  98. @param data: The string to escape.
  99. @rtype: L{str}
  100. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  101. encoded string.
  102. """
  103. if isinstance(data, unicode):
  104. data = data.encode('utf-8')
  105. return data.replace(b']]>', b']]]]><![CDATA[>')
  106. def escapedComment(data):
  107. """
  108. Escape a comment for inclusion in a document.
  109. @type data: L{str} or L{unicode}
  110. @param data: The string to escape.
  111. @rtype: C{str}
  112. @return: The quoted form of C{data}. If C{data} is unicode, return a utf-8
  113. encoded string.
  114. """
  115. if isinstance(data, unicode):
  116. data = data.encode('utf-8')
  117. data = data.replace(b'--', b'- - ').replace(b'>', b'&gt;')
  118. if data and data[-1:] == b'-':
  119. data += b' '
  120. return data
  121. def _getSlotValue(name, slotData, default=None):
  122. """
  123. Find the value of the named slot in the given stack of slot data.
  124. """
  125. for slotFrame in slotData[::-1]:
  126. if slotFrame is not None and name in slotFrame:
  127. return slotFrame[name]
  128. else:
  129. if default is not None:
  130. return default
  131. raise UnfilledSlot(name)
  132. def _flattenElement(request, root, write, slotData, renderFactory,
  133. dataEscaper):
  134. """
  135. Make C{root} slightly more flat by yielding all its immediate contents as
  136. strings, deferreds or generators that are recursive calls to itself.
  137. @param request: A request object which will be passed to
  138. L{IRenderable.render}.
  139. @param root: An object to be made flatter. This may be of type C{unicode},
  140. L{str}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple}, L{list},
  141. L{types.GeneratorType}, L{Deferred}, or an object that implements
  142. L{IRenderable}.
  143. @param write: A callable which will be invoked with each L{bytes} produced
  144. by flattening C{root}.
  145. @param slotData: A L{list} of L{dict} mapping L{str} slot names to data
  146. with which those slots will be replaced.
  147. @param renderFactory: If not L{None}, an object that provides
  148. L{IRenderable}.
  149. @param dataEscaper: A 1-argument callable which takes L{bytes} or
  150. L{unicode} and returns L{bytes}, quoted as appropriate for the
  151. rendering context. This is really only one of two values:
  152. L{attributeEscapingDoneOutside} or L{escapeForContent}, depending on
  153. whether the rendering context is within an attribute or not. See the
  154. explanation in L{writeWithAttributeEscaping}.
  155. @return: An iterator that eventually yields L{bytes} that should be written
  156. to the output. However it may also yield other iterators or
  157. L{Deferred}s; if it yields another iterator, the caller will iterate
  158. it; if it yields a L{Deferred}, the result of that L{Deferred} will
  159. either be L{bytes}, in which case it's written, or another generator,
  160. in which case it is iterated. See L{_flattenTree} for the trampoline
  161. that consumes said values.
  162. @rtype: An iterator which yields L{bytes}, L{Deferred}, and more iterators
  163. of the same type.
  164. """
  165. def keepGoing(newRoot, dataEscaper=dataEscaper,
  166. renderFactory=renderFactory, write=write):
  167. return _flattenElement(request, newRoot, write, slotData,
  168. renderFactory, dataEscaper)
  169. if isinstance(root, (bytes, unicode)):
  170. write(dataEscaper(root))
  171. elif isinstance(root, slot):
  172. slotValue = _getSlotValue(root.name, slotData, root.default)
  173. yield keepGoing(slotValue)
  174. elif isinstance(root, CDATA):
  175. write(b'<![CDATA[')
  176. write(escapedCDATA(root.data))
  177. write(b']]>')
  178. elif isinstance(root, Comment):
  179. write(b'<!--')
  180. write(escapedComment(root.data))
  181. write(b'-->')
  182. elif isinstance(root, Tag):
  183. slotData.append(root.slotData)
  184. if root.render is not None:
  185. rendererName = root.render
  186. rootClone = root.clone(False)
  187. rootClone.render = None
  188. renderMethod = renderFactory.lookupRenderMethod(rendererName)
  189. result = renderMethod(request, rootClone)
  190. yield keepGoing(result)
  191. slotData.pop()
  192. return
  193. if not root.tagName:
  194. yield keepGoing(root.children)
  195. return
  196. write(b'<')
  197. if isinstance(root.tagName, unicode):
  198. tagName = root.tagName.encode('ascii')
  199. else:
  200. tagName = root.tagName
  201. write(tagName)
  202. for k, v in iteritems(root.attributes):
  203. if isinstance(k, unicode):
  204. k = k.encode('ascii')
  205. write(b' ' + k + b'="')
  206. # Serialize the contents of the attribute, wrapping the results of
  207. # that serialization so that _everything_ is quoted.
  208. yield keepGoing(
  209. v,
  210. attributeEscapingDoneOutside,
  211. write=writeWithAttributeEscaping(write))
  212. write(b'"')
  213. if root.children or nativeString(tagName) not in voidElements:
  214. write(b'>')
  215. # Regardless of whether we're in an attribute or not, switch back
  216. # to the escapeForContent dataEscaper. The contents of a tag must
  217. # be quoted no matter what; in the top-level document, just so
  218. # they're valid, and if they're within an attribute, they have to
  219. # be quoted so that after applying the *un*-quoting required to re-
  220. # parse the tag within the attribute, all the quoting is still
  221. # correct.
  222. yield keepGoing(root.children, escapeForContent)
  223. write(b'</' + tagName + b'>')
  224. else:
  225. write(b' />')
  226. elif isinstance(root, (tuple, list, GeneratorType)):
  227. for element in root:
  228. yield keepGoing(element)
  229. elif isinstance(root, CharRef):
  230. escaped = '&#%d;' % (root.ordinal,)
  231. write(escaped.encode('ascii'))
  232. elif isinstance(root, Deferred):
  233. yield root.addCallback(lambda result: (result, keepGoing(result)))
  234. elif iscoroutine(root):
  235. d = ensureDeferred(root)
  236. yield d.addCallback(lambda result: (result, keepGoing(result)))
  237. elif IRenderable.providedBy(root):
  238. result = root.render(request)
  239. yield keepGoing(result, renderFactory=root)
  240. else:
  241. raise UnsupportedType(root)
  242. def _flattenTree(request, root, write):
  243. """
  244. Make C{root} into an iterable of L{bytes} and L{Deferred} by doing a depth
  245. first traversal of the tree.
  246. @param request: A request object which will be passed to
  247. L{IRenderable.render}.
  248. @param root: An object to be made flatter. This may be of type C{unicode},
  249. L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple},
  250. L{list}, L{types.GeneratorType}, L{Deferred}, or something providing
  251. L{IRenderable}.
  252. @param write: A callable which will be invoked with each L{bytes} produced
  253. by flattening C{root}.
  254. @return: An iterator which yields objects of type L{bytes} and L{Deferred}.
  255. A L{Deferred} is only yielded when one is encountered in the process of
  256. flattening C{root}. The returned iterator must not be iterated again
  257. until the L{Deferred} is called back.
  258. """
  259. stack = [_flattenElement(request, root, write, [], None, escapeForContent)]
  260. while stack:
  261. try:
  262. frame = stack[-1].gi_frame
  263. element = next(stack[-1])
  264. except StopIteration:
  265. stack.pop()
  266. except Exception as e:
  267. stack.pop()
  268. roots = []
  269. for generator in stack:
  270. roots.append(generator.gi_frame.f_locals['root'])
  271. roots.append(frame.f_locals['root'])
  272. raise FlattenerError(e, roots, extract_tb(exc_info()[2]))
  273. else:
  274. if isinstance(element, Deferred):
  275. def cbx(originalAndToFlatten):
  276. original, toFlatten = originalAndToFlatten
  277. stack.append(toFlatten)
  278. return original
  279. yield element.addCallback(cbx)
  280. else:
  281. stack.append(element)
  282. def _writeFlattenedData(state, write, result):
  283. """
  284. Take strings from an iterator and pass them to a writer function.
  285. @param state: An iterator of L{str} and L{Deferred}. L{str} instances will
  286. be passed to C{write}. L{Deferred} instances will be waited on before
  287. resuming iteration of C{state}.
  288. @param write: A callable which will be invoked with each L{str}
  289. produced by iterating C{state}.
  290. @param result: A L{Deferred} which will be called back when C{state} has
  291. been completely flattened into C{write} or which will be errbacked if
  292. an exception in a generator passed to C{state} or an errback from a
  293. L{Deferred} from state occurs.
  294. @return: L{None}
  295. """
  296. while True:
  297. try:
  298. element = next(state)
  299. except StopIteration:
  300. result.callback(None)
  301. except:
  302. result.errback()
  303. else:
  304. def cby(original):
  305. _writeFlattenedData(state, write, result)
  306. return original
  307. element.addCallbacks(cby, result.errback)
  308. break
  309. def flatten(request, root, write):
  310. """
  311. Incrementally write out a string representation of C{root} using C{write}.
  312. In order to create a string representation, C{root} will be decomposed into
  313. simpler objects which will themselves be decomposed and so on until strings
  314. or objects which can easily be converted to strings are encountered.
  315. @param request: A request object which will be passed to the C{render}
  316. method of any L{IRenderable} provider which is encountered.
  317. @param root: An object to be made flatter. This may be of type L{unicode},
  318. L{bytes}, L{slot}, L{Tag <twisted.web.template.Tag>}, L{tuple},
  319. L{list}, L{types.GeneratorType}, L{Deferred}, or something that provides
  320. L{IRenderable}.
  321. @param write: A callable which will be invoked with each L{bytes} produced
  322. by flattening C{root}.
  323. @return: A L{Deferred} which will be called back when C{root} has been
  324. completely flattened into C{write} or which will be errbacked if an
  325. unexpected exception occurs.
  326. """
  327. result = Deferred()
  328. state = _flattenTree(request, root, write)
  329. _writeFlattenedData(state, write, result)
  330. return result
  331. def flattenString(request, root):
  332. """
  333. Collate a string representation of C{root} into a single string.
  334. This is basically gluing L{flatten} to an L{io.BytesIO} and returning
  335. the results. See L{flatten} for the exact meanings of C{request} and
  336. C{root}.
  337. @return: A L{Deferred} which will be called back with a single string as
  338. its result when C{root} has been completely flattened into C{write} or
  339. which will be errbacked if an unexpected exception occurs.
  340. """
  341. io = BytesIO()
  342. d = flatten(request, root, io.write)
  343. d.addCallback(lambda _: io.getvalue())
  344. return d