You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

whitespace.py 1.2KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from __future__ import absolute_import, division, unicode_literals
  2. import re
  3. from . import base
  4. from ..constants import rcdataElements, spaceCharacters
  5. spaceCharacters = "".join(spaceCharacters)
  6. SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
  7. class Filter(base.Filter):
  8. """Collapses whitespace except in pre, textarea, and script elements"""
  9. spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
  10. def __iter__(self):
  11. preserve = 0
  12. for token in base.Filter.__iter__(self):
  13. type = token["type"]
  14. if type == "StartTag" \
  15. and (preserve or token["name"] in self.spacePreserveElements):
  16. preserve += 1
  17. elif type == "EndTag" and preserve:
  18. preserve -= 1
  19. elif not preserve and type == "SpaceCharacters" and token["data"]:
  20. # Test on token["data"] above to not introduce spaces where there were not
  21. token["data"] = " "
  22. elif not preserve and type == "Characters":
  23. token["data"] = collapse_spaces(token["data"])
  24. yield token
  25. def collapse_spaces(text):
  26. return SPACES_REGEX.sub(' ', text)