You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

inject_meta_charset.py 2.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. from __future__ import absolute_import, division, unicode_literals
  2. from . import base
  3. class Filter(base.Filter):
  4. """Injects ``<meta charset=ENCODING>`` tag into head of document"""
  5. def __init__(self, source, encoding):
  6. """Creates a Filter
  7. :arg source: the source token stream
  8. :arg encoding: the encoding to set
  9. """
  10. base.Filter.__init__(self, source)
  11. self.encoding = encoding
  12. def __iter__(self):
  13. state = "pre_head"
  14. meta_found = (self.encoding is None)
  15. pending = []
  16. for token in base.Filter.__iter__(self):
  17. type = token["type"]
  18. if type == "StartTag":
  19. if token["name"].lower() == "head":
  20. state = "in_head"
  21. elif type == "EmptyTag":
  22. if token["name"].lower() == "meta":
  23. # replace charset with actual encoding
  24. has_http_equiv_content_type = False
  25. for (namespace, name), value in token["data"].items():
  26. if namespace is not None:
  27. continue
  28. elif name.lower() == 'charset':
  29. token["data"][(namespace, name)] = self.encoding
  30. meta_found = True
  31. break
  32. elif name == 'http-equiv' and value.lower() == 'content-type':
  33. has_http_equiv_content_type = True
  34. else:
  35. if has_http_equiv_content_type and (None, "content") in token["data"]:
  36. token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
  37. meta_found = True
  38. elif token["name"].lower() == "head" and not meta_found:
  39. # insert meta into empty head
  40. yield {"type": "StartTag", "name": "head",
  41. "data": token["data"]}
  42. yield {"type": "EmptyTag", "name": "meta",
  43. "data": {(None, "charset"): self.encoding}}
  44. yield {"type": "EndTag", "name": "head"}
  45. meta_found = True
  46. continue
  47. elif type == "EndTag":
  48. if token["name"].lower() == "head" and pending:
  49. # insert meta into head (if necessary) and flush pending queue
  50. yield pending.pop(0)
  51. if not meta_found:
  52. yield {"type": "EmptyTag", "name": "meta",
  53. "data": {(None, "charset"): self.encoding}}
  54. while pending:
  55. yield pending.pop(0)
  56. meta_found = True
  57. state = "post_head"
  58. if state == "in_head":
  59. pending.append(token)
  60. else:
  61. yield token