You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

template.py 9.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. import re
  2. import warnings
  3. from io import StringIO
  4. from django.template.base import TRANSLATOR_COMMENT_MARK, Lexer, TokenType
  5. from . import TranslatorCommentWarning, trim_whitespace
  6. dot_re = re.compile(r'\S')
  7. def blankout(src, char):
  8. """
  9. Change every non-whitespace character to the given char.
  10. Used in the templatize function.
  11. """
  12. return dot_re.sub(char, src)
  13. context_re = re.compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""")
  14. inline_re = re.compile(
  15. # Match the trans 'some text' part
  16. r"""^\s*trans\s+((?:"[^"]*?")|(?:'[^']*?'))"""
  17. # Match and ignore optional filters
  18. r"""(?:\s*\|\s*[^\s:]+(?::(?:[^\s'":]+|(?:"[^"]*?")|(?:'[^']*?')))?)*"""
  19. # Match the optional context part
  20. r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*"""
  21. )
  22. block_re = re.compile(r"""^\s*blocktrans(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)""")
  23. endblock_re = re.compile(r"""^\s*endblocktrans$""")
  24. plural_re = re.compile(r"""^\s*plural$""")
  25. constant_re = re.compile(r"""_\(((?:".*?")|(?:'.*?'))\)""")
  26. def templatize(src, origin=None):
  27. """
  28. Turn a Django template into something that is understood by xgettext. It
  29. does so by translating the Django translation tags into standard gettext
  30. function invocations.
  31. """
  32. out = StringIO('')
  33. message_context = None
  34. intrans = False
  35. inplural = False
  36. trimmed = False
  37. singular = []
  38. plural = []
  39. incomment = False
  40. comment = []
  41. lineno_comment_map = {}
  42. comment_lineno_cache = None
  43. # Adding the u prefix allows gettext to recognize the string (#26093).
  44. raw_prefix = 'u'
  45. def join_tokens(tokens, trim=False):
  46. message = ''.join(tokens)
  47. if trim:
  48. message = trim_whitespace(message)
  49. return message
  50. for t in Lexer(src).tokenize():
  51. if incomment:
  52. if t.token_type == TokenType.BLOCK and t.contents == 'endcomment':
  53. content = ''.join(comment)
  54. translators_comment_start = None
  55. for lineno, line in enumerate(content.splitlines(True)):
  56. if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
  57. translators_comment_start = lineno
  58. for lineno, line in enumerate(content.splitlines(True)):
  59. if translators_comment_start is not None and lineno >= translators_comment_start:
  60. out.write(' # %s' % line)
  61. else:
  62. out.write(' #\n')
  63. incomment = False
  64. comment = []
  65. else:
  66. comment.append(t.contents)
  67. elif intrans:
  68. if t.token_type == TokenType.BLOCK:
  69. endbmatch = endblock_re.match(t.contents)
  70. pluralmatch = plural_re.match(t.contents)
  71. if endbmatch:
  72. if inplural:
  73. if message_context:
  74. out.write(' npgettext({p}{!r}, {p}{!r}, {p}{!r},count) '.format(
  75. message_context,
  76. join_tokens(singular, trimmed),
  77. join_tokens(plural, trimmed),
  78. p=raw_prefix,
  79. ))
  80. else:
  81. out.write(' ngettext({p}{!r}, {p}{!r}, count) '.format(
  82. join_tokens(singular, trimmed),
  83. join_tokens(plural, trimmed),
  84. p=raw_prefix,
  85. ))
  86. for part in singular:
  87. out.write(blankout(part, 'S'))
  88. for part in plural:
  89. out.write(blankout(part, 'P'))
  90. else:
  91. if message_context:
  92. out.write(' pgettext({p}{!r}, {p}{!r}) '.format(
  93. message_context,
  94. join_tokens(singular, trimmed),
  95. p=raw_prefix,
  96. ))
  97. else:
  98. out.write(' gettext({p}{!r}) '.format(
  99. join_tokens(singular, trimmed),
  100. p=raw_prefix,
  101. ))
  102. for part in singular:
  103. out.write(blankout(part, 'S'))
  104. message_context = None
  105. intrans = False
  106. inplural = False
  107. singular = []
  108. plural = []
  109. elif pluralmatch:
  110. inplural = True
  111. else:
  112. filemsg = ''
  113. if origin:
  114. filemsg = 'file %s, ' % origin
  115. raise SyntaxError(
  116. "Translation blocks must not include other block tags: "
  117. "%s (%sline %d)" % (t.contents, filemsg, t.lineno)
  118. )
  119. elif t.token_type == TokenType.VAR:
  120. if inplural:
  121. plural.append('%%(%s)s' % t.contents)
  122. else:
  123. singular.append('%%(%s)s' % t.contents)
  124. elif t.token_type == TokenType.TEXT:
  125. contents = t.contents.replace('%', '%%')
  126. if inplural:
  127. plural.append(contents)
  128. else:
  129. singular.append(contents)
  130. else:
  131. # Handle comment tokens (`{# ... #}`) plus other constructs on
  132. # the same line:
  133. if comment_lineno_cache is not None:
  134. cur_lineno = t.lineno + t.contents.count('\n')
  135. if comment_lineno_cache == cur_lineno:
  136. if t.token_type != TokenType.COMMENT:
  137. for c in lineno_comment_map[comment_lineno_cache]:
  138. filemsg = ''
  139. if origin:
  140. filemsg = 'file %s, ' % origin
  141. warn_msg = (
  142. "The translator-targeted comment '%s' "
  143. "(%sline %d) was ignored, because it wasn't "
  144. "the last item on the line."
  145. ) % (c, filemsg, comment_lineno_cache)
  146. warnings.warn(warn_msg, TranslatorCommentWarning)
  147. lineno_comment_map[comment_lineno_cache] = []
  148. else:
  149. out.write('# %s' % ' | '.join(lineno_comment_map[comment_lineno_cache]))
  150. comment_lineno_cache = None
  151. if t.token_type == TokenType.BLOCK:
  152. imatch = inline_re.match(t.contents)
  153. bmatch = block_re.match(t.contents)
  154. cmatches = constant_re.findall(t.contents)
  155. if imatch:
  156. g = imatch.group(1)
  157. if g[0] == '"':
  158. g = g.strip('"')
  159. elif g[0] == "'":
  160. g = g.strip("'")
  161. g = g.replace('%', '%%')
  162. if imatch.group(2):
  163. # A context is provided
  164. context_match = context_re.match(imatch.group(2))
  165. message_context = context_match.group(1)
  166. if message_context[0] == '"':
  167. message_context = message_context.strip('"')
  168. elif message_context[0] == "'":
  169. message_context = message_context.strip("'")
  170. out.write(' pgettext({p}{!r}, {p}{!r}) '.format(
  171. message_context, g, p=raw_prefix
  172. ))
  173. message_context = None
  174. else:
  175. out.write(' gettext({p}{!r}) '.format(g, p=raw_prefix))
  176. elif bmatch:
  177. for fmatch in constant_re.findall(t.contents):
  178. out.write(' _(%s) ' % fmatch)
  179. if bmatch.group(1):
  180. # A context is provided
  181. context_match = context_re.match(bmatch.group(1))
  182. message_context = context_match.group(1)
  183. if message_context[0] == '"':
  184. message_context = message_context.strip('"')
  185. elif message_context[0] == "'":
  186. message_context = message_context.strip("'")
  187. intrans = True
  188. inplural = False
  189. trimmed = 'trimmed' in t.split_contents()
  190. singular = []
  191. plural = []
  192. elif cmatches:
  193. for cmatch in cmatches:
  194. out.write(' _(%s) ' % cmatch)
  195. elif t.contents == 'comment':
  196. incomment = True
  197. else:
  198. out.write(blankout(t.contents, 'B'))
  199. elif t.token_type == TokenType.VAR:
  200. parts = t.contents.split('|')
  201. cmatch = constant_re.match(parts[0])
  202. if cmatch:
  203. out.write(' _(%s) ' % cmatch.group(1))
  204. for p in parts[1:]:
  205. if p.find(':_(') >= 0:
  206. out.write(' %s ' % p.split(':', 1)[1])
  207. else:
  208. out.write(blankout(p, 'F'))
  209. elif t.token_type == TokenType.COMMENT:
  210. if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
  211. lineno_comment_map.setdefault(t.lineno, []).append(t.contents)
  212. comment_lineno_cache = t.lineno
  213. else:
  214. out.write(blankout(t.contents, 'X'))
  215. return out.getvalue()