123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- import warnings
- from io import StringIO
-
- from django.template.base import Lexer, TokenType
- from django.utils.regex_helper import _lazy_re_compile
-
- from . import TranslatorCommentWarning, trim_whitespace
-
- TRANSLATOR_COMMENT_MARK = "Translators"
-
- dot_re = _lazy_re_compile(r"\S")
-
-
- def blankout(src, char):
- """
- Change every non-whitespace character to the given char.
- Used in the templatize function.
- """
- return dot_re.sub(char, src)
-
-
- context_re = _lazy_re_compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""")
- inline_re = _lazy_re_compile(
- # Match the trans/translate 'some text' part.
- r"""^\s*trans(?:late)?\s+((?:"[^"]*?")|(?:'[^']*?'))"""
- # Match and ignore optional filters
- r"""(?:\s*\|\s*[^\s:]+(?::(?:[^\s'":]+|(?:"[^"]*?")|(?:'[^']*?')))?)*"""
- # Match the optional context part
- r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*"""
- )
- block_re = _lazy_re_compile(
- r"""^\s*blocktrans(?:late)?(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)"""
- )
- endblock_re = _lazy_re_compile(r"""^\s*endblocktrans(?:late)?$""")
- plural_re = _lazy_re_compile(r"""^\s*plural$""")
- constant_re = _lazy_re_compile(r"""_\(((?:".*?")|(?:'.*?'))\)""")
-
-
- def templatize(src, origin=None):
- """
- Turn a Django template into something that is understood by xgettext. It
- does so by translating the Django translation tags into standard gettext
- function invocations.
- """
- out = StringIO("")
- message_context = None
- intrans = False
- inplural = False
- trimmed = False
- singular = []
- plural = []
- incomment = False
- comment = []
- lineno_comment_map = {}
- comment_lineno_cache = None
- # Adding the u prefix allows gettext to recognize the string (#26093).
- raw_prefix = "u"
-
- def join_tokens(tokens, trim=False):
- message = "".join(tokens)
- if trim:
- message = trim_whitespace(message)
- return message
-
- for t in Lexer(src).tokenize():
- if incomment:
- if t.token_type == TokenType.BLOCK and t.contents == "endcomment":
- content = "".join(comment)
- translators_comment_start = None
- for lineno, line in enumerate(content.splitlines(True)):
- if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
- translators_comment_start = lineno
- for lineno, line in enumerate(content.splitlines(True)):
- if (
- translators_comment_start is not None
- and lineno >= translators_comment_start
- ):
- out.write(" # %s" % line)
- else:
- out.write(" #\n")
- incomment = False
- comment = []
- else:
- comment.append(t.contents)
- elif intrans:
- if t.token_type == TokenType.BLOCK:
- endbmatch = endblock_re.match(t.contents)
- pluralmatch = plural_re.match(t.contents)
- if endbmatch:
- if inplural:
- if message_context:
- out.write(
- " npgettext({p}{!r}, {p}{!r}, {p}{!r},count) ".format(
- message_context,
- join_tokens(singular, trimmed),
- join_tokens(plural, trimmed),
- p=raw_prefix,
- )
- )
- else:
- out.write(
- " ngettext({p}{!r}, {p}{!r}, count) ".format(
- join_tokens(singular, trimmed),
- join_tokens(plural, trimmed),
- p=raw_prefix,
- )
- )
- for part in singular:
- out.write(blankout(part, "S"))
- for part in plural:
- out.write(blankout(part, "P"))
- else:
- if message_context:
- out.write(
- " pgettext({p}{!r}, {p}{!r}) ".format(
- message_context,
- join_tokens(singular, trimmed),
- p=raw_prefix,
- )
- )
- else:
- out.write(
- " gettext({p}{!r}) ".format(
- join_tokens(singular, trimmed),
- p=raw_prefix,
- )
- )
- for part in singular:
- out.write(blankout(part, "S"))
- message_context = None
- intrans = False
- inplural = False
- singular = []
- plural = []
- elif pluralmatch:
- inplural = True
- else:
- filemsg = ""
- if origin:
- filemsg = "file %s, " % origin
- raise SyntaxError(
- "Translation blocks must not include other block tags: "
- "%s (%sline %d)" % (t.contents, filemsg, t.lineno)
- )
- elif t.token_type == TokenType.VAR:
- if inplural:
- plural.append("%%(%s)s" % t.contents)
- else:
- singular.append("%%(%s)s" % t.contents)
- elif t.token_type == TokenType.TEXT:
- contents = t.contents.replace("%", "%%")
- if inplural:
- plural.append(contents)
- else:
- singular.append(contents)
- else:
- # Handle comment tokens (`{# ... #}`) plus other constructs on
- # the same line:
- if comment_lineno_cache is not None:
- cur_lineno = t.lineno + t.contents.count("\n")
- if comment_lineno_cache == cur_lineno:
- if t.token_type != TokenType.COMMENT:
- for c in lineno_comment_map[comment_lineno_cache]:
- filemsg = ""
- if origin:
- filemsg = "file %s, " % origin
- warn_msg = (
- "The translator-targeted comment '%s' "
- "(%sline %d) was ignored, because it wasn't "
- "the last item on the line."
- ) % (c, filemsg, comment_lineno_cache)
- warnings.warn(warn_msg, TranslatorCommentWarning)
- lineno_comment_map[comment_lineno_cache] = []
- else:
- out.write(
- "# %s" % " | ".join(lineno_comment_map[comment_lineno_cache])
- )
- comment_lineno_cache = None
-
- if t.token_type == TokenType.BLOCK:
- imatch = inline_re.match(t.contents)
- bmatch = block_re.match(t.contents)
- cmatches = constant_re.findall(t.contents)
- if imatch:
- g = imatch[1]
- if g[0] == '"':
- g = g.strip('"')
- elif g[0] == "'":
- g = g.strip("'")
- g = g.replace("%", "%%")
- if imatch[2]:
- # A context is provided
- context_match = context_re.match(imatch[2])
- message_context = context_match[1]
- if message_context[0] == '"':
- message_context = message_context.strip('"')
- elif message_context[0] == "'":
- message_context = message_context.strip("'")
- out.write(
- " pgettext({p}{!r}, {p}{!r}) ".format(
- message_context, g, p=raw_prefix
- )
- )
- message_context = None
- else:
- out.write(" gettext({p}{!r}) ".format(g, p=raw_prefix))
- elif bmatch:
- for fmatch in constant_re.findall(t.contents):
- out.write(" _(%s) " % fmatch)
- if bmatch[1]:
- # A context is provided
- context_match = context_re.match(bmatch[1])
- message_context = context_match[1]
- if message_context[0] == '"':
- message_context = message_context.strip('"')
- elif message_context[0] == "'":
- message_context = message_context.strip("'")
- intrans = True
- inplural = False
- trimmed = "trimmed" in t.split_contents()
- singular = []
- plural = []
- elif cmatches:
- for cmatch in cmatches:
- out.write(" _(%s) " % cmatch)
- elif t.contents == "comment":
- incomment = True
- else:
- out.write(blankout(t.contents, "B"))
- elif t.token_type == TokenType.VAR:
- parts = t.contents.split("|")
- cmatch = constant_re.match(parts[0])
- if cmatch:
- out.write(" _(%s) " % cmatch[1])
- for p in parts[1:]:
- if p.find(":_(") >= 0:
- out.write(" %s " % p.split(":", 1)[1])
- else:
- out.write(blankout(p, "F"))
- elif t.token_type == TokenType.COMMENT:
- if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
- lineno_comment_map.setdefault(t.lineno, []).append(t.contents)
- comment_lineno_cache = t.lineno
- else:
- out.write(blankout(t.contents, "X"))
- return out.getvalue()
|