123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- import re
- import warnings
- from io import StringIO
-
- from django.template.base import TRANSLATOR_COMMENT_MARK, Lexer, TokenType
-
- from . import TranslatorCommentWarning, trim_whitespace
-
- dot_re = re.compile(r'\S')
-
-
- def blankout(src, char):
- """
- Change every non-whitespace character to the given char.
- Used in the templatize function.
- """
- return dot_re.sub(char, src)
-
-
- context_re = re.compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""")
- inline_re = re.compile(
- # Match the trans 'some text' part
- r"""^\s*trans\s+((?:"[^"]*?")|(?:'[^']*?'))"""
- # Match and ignore optional filters
- r"""(?:\s*\|\s*[^\s:]+(?::(?:[^\s'":]+|(?:"[^"]*?")|(?:'[^']*?')))?)*"""
- # Match the optional context part
- r"""(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*"""
- )
- block_re = re.compile(r"""^\s*blocktrans(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)""")
- endblock_re = re.compile(r"""^\s*endblocktrans$""")
- plural_re = re.compile(r"""^\s*plural$""")
- constant_re = re.compile(r"""_\(((?:".*?")|(?:'.*?'))\)""")
-
-
- def templatize(src, origin=None):
- """
- Turn a Django template into something that is understood by xgettext. It
- does so by translating the Django translation tags into standard gettext
- function invocations.
- """
- out = StringIO('')
- message_context = None
- intrans = False
- inplural = False
- trimmed = False
- singular = []
- plural = []
- incomment = False
- comment = []
- lineno_comment_map = {}
- comment_lineno_cache = None
- # Adding the u prefix allows gettext to recognize the string (#26093).
- raw_prefix = 'u'
-
- def join_tokens(tokens, trim=False):
- message = ''.join(tokens)
- if trim:
- message = trim_whitespace(message)
- return message
-
- for t in Lexer(src).tokenize():
- if incomment:
- if t.token_type == TokenType.BLOCK and t.contents == 'endcomment':
- content = ''.join(comment)
- translators_comment_start = None
- for lineno, line in enumerate(content.splitlines(True)):
- if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
- translators_comment_start = lineno
- for lineno, line in enumerate(content.splitlines(True)):
- if translators_comment_start is not None and lineno >= translators_comment_start:
- out.write(' # %s' % line)
- else:
- out.write(' #\n')
- incomment = False
- comment = []
- else:
- comment.append(t.contents)
- elif intrans:
- if t.token_type == TokenType.BLOCK:
- endbmatch = endblock_re.match(t.contents)
- pluralmatch = plural_re.match(t.contents)
- if endbmatch:
- if inplural:
- if message_context:
- out.write(' npgettext({p}{!r}, {p}{!r}, {p}{!r},count) '.format(
- message_context,
- join_tokens(singular, trimmed),
- join_tokens(plural, trimmed),
- p=raw_prefix,
- ))
- else:
- out.write(' ngettext({p}{!r}, {p}{!r}, count) '.format(
- join_tokens(singular, trimmed),
- join_tokens(plural, trimmed),
- p=raw_prefix,
- ))
- for part in singular:
- out.write(blankout(part, 'S'))
- for part in plural:
- out.write(blankout(part, 'P'))
- else:
- if message_context:
- out.write(' pgettext({p}{!r}, {p}{!r}) '.format(
- message_context,
- join_tokens(singular, trimmed),
- p=raw_prefix,
- ))
- else:
- out.write(' gettext({p}{!r}) '.format(
- join_tokens(singular, trimmed),
- p=raw_prefix,
- ))
- for part in singular:
- out.write(blankout(part, 'S'))
- message_context = None
- intrans = False
- inplural = False
- singular = []
- plural = []
- elif pluralmatch:
- inplural = True
- else:
- filemsg = ''
- if origin:
- filemsg = 'file %s, ' % origin
- raise SyntaxError(
- "Translation blocks must not include other block tags: "
- "%s (%sline %d)" % (t.contents, filemsg, t.lineno)
- )
- elif t.token_type == TokenType.VAR:
- if inplural:
- plural.append('%%(%s)s' % t.contents)
- else:
- singular.append('%%(%s)s' % t.contents)
- elif t.token_type == TokenType.TEXT:
- contents = t.contents.replace('%', '%%')
- if inplural:
- plural.append(contents)
- else:
- singular.append(contents)
- else:
- # Handle comment tokens (`{# ... #}`) plus other constructs on
- # the same line:
- if comment_lineno_cache is not None:
- cur_lineno = t.lineno + t.contents.count('\n')
- if comment_lineno_cache == cur_lineno:
- if t.token_type != TokenType.COMMENT:
- for c in lineno_comment_map[comment_lineno_cache]:
- filemsg = ''
- if origin:
- filemsg = 'file %s, ' % origin
- warn_msg = (
- "The translator-targeted comment '%s' "
- "(%sline %d) was ignored, because it wasn't "
- "the last item on the line."
- ) % (c, filemsg, comment_lineno_cache)
- warnings.warn(warn_msg, TranslatorCommentWarning)
- lineno_comment_map[comment_lineno_cache] = []
- else:
- out.write('# %s' % ' | '.join(lineno_comment_map[comment_lineno_cache]))
- comment_lineno_cache = None
-
- if t.token_type == TokenType.BLOCK:
- imatch = inline_re.match(t.contents)
- bmatch = block_re.match(t.contents)
- cmatches = constant_re.findall(t.contents)
- if imatch:
- g = imatch.group(1)
- if g[0] == '"':
- g = g.strip('"')
- elif g[0] == "'":
- g = g.strip("'")
- g = g.replace('%', '%%')
- if imatch.group(2):
- # A context is provided
- context_match = context_re.match(imatch.group(2))
- message_context = context_match.group(1)
- if message_context[0] == '"':
- message_context = message_context.strip('"')
- elif message_context[0] == "'":
- message_context = message_context.strip("'")
- out.write(' pgettext({p}{!r}, {p}{!r}) '.format(
- message_context, g, p=raw_prefix
- ))
- message_context = None
- else:
- out.write(' gettext({p}{!r}) '.format(g, p=raw_prefix))
- elif bmatch:
- for fmatch in constant_re.findall(t.contents):
- out.write(' _(%s) ' % fmatch)
- if bmatch.group(1):
- # A context is provided
- context_match = context_re.match(bmatch.group(1))
- message_context = context_match.group(1)
- if message_context[0] == '"':
- message_context = message_context.strip('"')
- elif message_context[0] == "'":
- message_context = message_context.strip("'")
- intrans = True
- inplural = False
- trimmed = 'trimmed' in t.split_contents()
- singular = []
- plural = []
- elif cmatches:
- for cmatch in cmatches:
- out.write(' _(%s) ' % cmatch)
- elif t.contents == 'comment':
- incomment = True
- else:
- out.write(blankout(t.contents, 'B'))
- elif t.token_type == TokenType.VAR:
- parts = t.contents.split('|')
- cmatch = constant_re.match(parts[0])
- if cmatch:
- out.write(' _(%s) ' % cmatch.group(1))
- for p in parts[1:]:
- if p.find(':_(') >= 0:
- out.write(' %s ' % p.split(':', 1)[1])
- else:
- out.write(blankout(p, 'F'))
- elif t.token_type == TokenType.COMMENT:
- if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
- lineno_comment_map.setdefault(t.lineno, []).append(t.contents)
- comment_lineno_cache = t.lineno
- else:
- out.write(blankout(t.contents, 'X'))
- return out.getvalue()
|