import fnmatch import glob import os import re import sys from functools import total_ordering from itertools import dropwhile import django from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.core.files.temp import NamedTemporaryFile from django.core.management.base import BaseCommand, CommandError from django.core.management.utils import ( find_command, handle_extensions, popen_wrapper, ) from django.utils.encoding import DEFAULT_LOCALE_ENCODING from django.utils.functional import cached_property from django.utils.jslex import prepare_js_for_gettext from django.utils.text import get_text_list from django.utils.translation import templatize plural_forms_re = re.compile(r'^(?P"Plural-Forms.+?\\n")\s*$', re.MULTILINE | re.DOTALL) STATUS_OK = 0 NO_LOCALE_DIR = object() def check_programs(*programs): for program in programs: if find_command(program) is None: raise CommandError( "Can't find %s. Make sure you have GNU gettext tools 0.15 or " "newer installed." % program ) @total_ordering class TranslatableFile: def __init__(self, dirpath, file_name, locale_dir): self.file = file_name self.dirpath = dirpath self.locale_dir = locale_dir def __repr__(self): return "<%s: %s>" % ( self.__class__.__name__, os.sep.join([self.dirpath, self.file]), ) def __eq__(self, other): return self.path == other.path def __lt__(self, other): return self.path < other.path @property def path(self): return os.path.join(self.dirpath, self.file) class BuildFile: """ Represent the state of a translatable file during the build process. """ def __init__(self, command, domain, translatable): self.command = command self.domain = domain self.translatable = translatable @cached_property def is_templatized(self): if self.domain == 'djangojs': return self.command.gettext_version < (0, 18, 3) elif self.domain == 'django': file_ext = os.path.splitext(self.translatable.file)[1] return file_ext != '.py' return False @cached_property def path(self): return self.translatable.path @cached_property def work_path(self): """ Path to a file which is being fed into GNU gettext pipeline. This may be either a translatable or its preprocessed version. """ if not self.is_templatized: return self.path extension = { 'djangojs': 'c', 'django': 'py', }.get(self.domain) filename = '%s.%s' % (self.translatable.file, extension) return os.path.join(self.translatable.dirpath, filename) def preprocess(self): """ Preprocess (if necessary) a translatable file before passing it to xgettext GNU gettext utility. """ if not self.is_templatized: return encoding = settings.FILE_CHARSET if self.command.settings_available else 'utf-8' with open(self.path, 'r', encoding=encoding) as fp: src_data = fp.read() if self.domain == 'djangojs': content = prepare_js_for_gettext(src_data) elif self.domain == 'django': content = templatize(src_data, origin=self.path[2:]) with open(self.work_path, 'w', encoding='utf-8') as fp: fp.write(content) def postprocess_messages(self, msgs): """ Postprocess messages generated by xgettext GNU gettext utility. Transform paths as if these messages were generated from original translatable files rather than from preprocessed versions. """ if not self.is_templatized: return msgs # Remove '.py' suffix if os.name == 'nt': # Preserve '.\' prefix on Windows to respect gettext behavior old_path = self.work_path new_path = self.path else: old_path = self.work_path[2:] new_path = self.path[2:] return re.sub( r'^(#: .*)(' + re.escape(old_path) + r')', lambda match: match.group().replace(old_path, new_path), msgs, flags=re.MULTILINE ) def cleanup(self): """ Remove a preprocessed copy of a translatable file (if any). """ if self.is_templatized: # This check is needed for the case of a symlinked file and its # source being processed inside a single group (locale dir); # removing either of those two removes both. if os.path.exists(self.work_path): os.unlink(self.work_path) def normalize_eols(raw_contents): """ Take a block of raw text that will be passed through str.splitlines() to get universal newlines treatment. Return the resulting block of text with normalized `\n` EOL sequences ready to be written to disk using current platform's native EOLs. """ lines_list = raw_contents.splitlines() # Ensure last line has its EOL if lines_list and lines_list[-1]: lines_list.append('') return '\n'.join(lines_list) def write_pot_file(potfile, msgs): """ Write the `potfile` with the `msgs` contents, making sure its format is valid. """ pot_lines = msgs.splitlines() if os.path.exists(potfile): # Strip the header lines = dropwhile(len, pot_lines) else: lines = [] found, header_read = False, False for line in pot_lines: if not found and not header_read: if 'charset=CHARSET' in line: found = True line = line.replace('charset=CHARSET', 'charset=UTF-8') if not line and not found: header_read = True lines.append(line) msgs = '\n'.join(lines) # Force newlines of POT files to '\n' to work around # https://savannah.gnu.org/bugs/index.php?52395 with open(potfile, 'a', encoding='utf-8', newline='\n') as fp: fp.write(msgs) class Command(BaseCommand): help = ( "Runs over the entire source tree of the current directory and " "pulls out all strings marked for translation. It creates (or updates) a message " "file in the conf/locale (in the django tree) or locale (for projects and " "applications) directory.\n\nYou must run this command with one of either the " "--locale, --exclude, or --all options." ) translatable_file_class = TranslatableFile build_file_class = BuildFile requires_system_checks = False msgmerge_options = ['-q', '--previous'] msguniq_options = ['--to-code=utf-8'] msgattrib_options = ['--no-obsolete'] xgettext_options = ['--from-code=UTF-8', '--add-comments=Translators'] def add_arguments(self, parser): parser.add_argument( '--locale', '-l', default=[], dest='locale', action='append', help='Creates or updates the message files for the given locale(s) (e.g. pt_BR). ' 'Can be used multiple times.', ) parser.add_argument( '--exclude', '-x', default=[], dest='exclude', action='append', help='Locales to exclude. Default is none. Can be used multiple times.', ) parser.add_argument( '--domain', '-d', default='django', dest='domain', help='The domain of the message files (default: "django").', ) parser.add_argument( '--all', '-a', action='store_true', dest='all', help='Updates the message files for all existing locales.', ) parser.add_argument( '--extension', '-e', dest='extensions', action='append', help='The file extension(s) to examine (default: "html,txt,py", or "js" ' 'if the domain is "djangojs"). Separate multiple extensions with ' 'commas, or use -e multiple times.', ) parser.add_argument( '--symlinks', '-s', action='store_true', dest='symlinks', help='Follows symlinks to directories when examining source code ' 'and templates for translation strings.', ) parser.add_argument( '--ignore', '-i', action='append', dest='ignore_patterns', default=[], metavar='PATTERN', help='Ignore files or directories matching this glob-style pattern. ' 'Use multiple times to ignore more.', ) parser.add_argument( '--no-default-ignore', action='store_false', dest='use_default_ignore_patterns', help="Don't ignore the common glob-style patterns 'CVS', '.*', '*~' and '*.pyc'.", ) parser.add_argument( '--no-wrap', action='store_true', dest='no_wrap', help="Don't break long message lines into several lines.", ) parser.add_argument( '--no-location', action='store_true', dest='no_location', help="Don't write '#: filename:line' lines.", ) parser.add_argument( '--add-location', dest='add_location', choices=('full', 'file', 'never'), const='full', nargs='?', help=( "Controls '#: filename:line' lines. If the option is 'full' " "(the default if not given), the lines include both file name " "and line number. If it's 'file', the line number is omitted. If " "it's 'never', the lines are suppressed (same as --no-location). " "--add-location requires gettext 0.19 or newer." ), ) parser.add_argument( '--no-obsolete', action='store_true', dest='no_obsolete', help="Remove obsolete message strings.", ) parser.add_argument( '--keep-pot', action='store_true', dest='keep_pot', help="Keep .pot file after making messages. Useful when debugging.", ) def handle(self, *args, **options): locale = options['locale'] exclude = options['exclude'] self.domain = options['domain'] self.verbosity = options['verbosity'] process_all = options['all'] extensions = options['extensions'] self.symlinks = options['symlinks'] ignore_patterns = options['ignore_patterns'] if options['use_default_ignore_patterns']: ignore_patterns += ['CVS', '.*', '*~', '*.pyc'] self.ignore_patterns = list(set(ignore_patterns)) # Avoid messing with mutable class variables if options['no_wrap']: self.msgmerge_options = self.msgmerge_options[:] + ['--no-wrap'] self.msguniq_options = self.msguniq_options[:] + ['--no-wrap'] self.msgattrib_options = self.msgattrib_options[:] + ['--no-wrap'] self.xgettext_options = self.xgettext_options[:] + ['--no-wrap'] if options['no_location']: self.msgmerge_options = self.msgmerge_options[:] + ['--no-location'] self.msguniq_options = self.msguniq_options[:] + ['--no-location'] self.msgattrib_options = self.msgattrib_options[:] + ['--no-location'] self.xgettext_options = self.xgettext_options[:] + ['--no-location'] if options['add_location']: if self.gettext_version < (0, 19): raise CommandError( "The --add-location option requires gettext 0.19 or later. " "You have %s." % '.'.join(str(x) for x in self.gettext_version) ) arg_add_location = "--add-location=%s" % options['add_location'] self.msgmerge_options = self.msgmerge_options[:] + [arg_add_location] self.msguniq_options = self.msguniq_options[:] + [arg_add_location] self.msgattrib_options = self.msgattrib_options[:] + [arg_add_location] self.xgettext_options = self.xgettext_options[:] + [arg_add_location] self.no_obsolete = options['no_obsolete'] self.keep_pot = options['keep_pot'] if self.domain not in ('django', 'djangojs'): raise CommandError("currently makemessages only supports domains " "'django' and 'djangojs'") if self.domain == 'djangojs': exts = extensions or ['js'] else: exts = extensions or ['html', 'txt', 'py'] self.extensions = handle_extensions(exts) if (locale is None and not exclude and not process_all) or self.domain is None: raise CommandError( "Type '%s help %s' for usage information." % (os.path.basename(sys.argv[0]), sys.argv[1]) ) if self.verbosity > 1: self.stdout.write( 'examining files with the extensions: %s\n' % get_text_list(list(self.extensions), 'and') ) self.invoked_for_django = False self.locale_paths = [] self.default_locale_path = None if os.path.isdir(os.path.join('conf', 'locale')): self.locale_paths = [os.path.abspath(os.path.join('conf', 'locale'))] self.default_locale_path = self.locale_paths[0] self.invoked_for_django = True else: if self.settings_available: self.locale_paths.extend(settings.LOCALE_PATHS) # Allow to run makemessages inside an app dir if os.path.isdir('locale'): self.locale_paths.append(os.path.abspath('locale')) if self.locale_paths: self.default_locale_path = self.locale_paths[0] if not os.path.exists(self.default_locale_path): os.makedirs(self.default_locale_path) # Build locale list looks_like_locale = re.compile(r'[a-z]{2}') locale_dirs = filter(os.path.isdir, glob.glob('%s/*' % self.default_locale_path)) all_locales = [ lang_code for lang_code in map(os.path.basename, locale_dirs) if looks_like_locale.match(lang_code) ] # Account for excluded locales if process_all: locales = all_locales else: locales = locale or all_locales locales = set(locales).difference(exclude) if locales: check_programs('msguniq', 'msgmerge', 'msgattrib') check_programs('xgettext') try: potfiles = self.build_potfiles() # Build po files for each selected locale for locale in locales: if self.verbosity > 0: self.stdout.write("processing locale %s\n" % locale) for potfile in potfiles: self.write_po_file(potfile, locale) finally: if not self.keep_pot: self.remove_potfiles() @cached_property def gettext_version(self): # Gettext tools will output system-encoded bytestrings instead of UTF-8, # when looking up the version. It's especially a problem on Windows. out, err, status = popen_wrapper( ['xgettext', '--version'], stdout_encoding=DEFAULT_LOCALE_ENCODING, ) m = re.search(r'(\d+)\.(\d+)\.?(\d+)?', out) if m: return tuple(int(d) for d in m.groups() if d is not None) else: raise CommandError("Unable to get gettext version. Is it installed?") @cached_property def settings_available(self): try: settings.LOCALE_PATHS except ImproperlyConfigured: if self.verbosity > 1: self.stderr.write("Running without configured settings.") return False return True def build_potfiles(self): """ Build pot files and apply msguniq to them. """ file_list = self.find_files(".") self.remove_potfiles() self.process_files(file_list) potfiles = [] for path in self.locale_paths: potfile = os.path.join(path, '%s.pot' % self.domain) if not os.path.exists(potfile): continue args = ['msguniq'] + self.msguniq_options + [potfile] msgs, errors, status = popen_wrapper(args) if errors: if status != STATUS_OK: raise CommandError( "errors happened while running msguniq\n%s" % errors) elif self.verbosity > 0: self.stdout.write(errors) msgs = normalize_eols(msgs) with open(potfile, 'w', encoding='utf-8') as fp: fp.write(msgs) potfiles.append(potfile) return potfiles def remove_potfiles(self): for path in self.locale_paths: pot_path = os.path.join(path, '%s.pot' % self.domain) if os.path.exists(pot_path): os.unlink(pot_path) def find_files(self, root): """ Get all files in the given root. Also check that there is a matching locale dir for each file. """ def is_ignored(path, ignore_patterns): """ Check if the given path should be ignored or not. """ filename = os.path.basename(path) def ignore(pattern): return fnmatch.fnmatchcase(filename, pattern) or fnmatch.fnmatchcase(path, pattern) return any(ignore(pattern) for pattern in ignore_patterns) ignore_patterns = [os.path.normcase(p) for p in self.ignore_patterns] dir_suffixes = {'%s*' % path_sep for path_sep in {'/', os.sep}} norm_patterns = [] for p in ignore_patterns: for dir_suffix in dir_suffixes: if p.endswith(dir_suffix): norm_patterns.append(p[:-len(dir_suffix)]) break else: norm_patterns.append(p) all_files = [] ignored_roots = [] if self.settings_available: ignored_roots = [os.path.normpath(p) for p in (settings.MEDIA_ROOT, settings.STATIC_ROOT) if p] for dirpath, dirnames, filenames in os.walk(root, topdown=True, followlinks=self.symlinks): for dirname in dirnames[:]: if (is_ignored(os.path.normpath(os.path.join(dirpath, dirname)), norm_patterns) or os.path.join(os.path.abspath(dirpath), dirname) in ignored_roots): dirnames.remove(dirname) if self.verbosity > 1: self.stdout.write('ignoring directory %s\n' % dirname) elif dirname == 'locale': dirnames.remove(dirname) self.locale_paths.insert(0, os.path.join(os.path.abspath(dirpath), dirname)) for filename in filenames: file_path = os.path.normpath(os.path.join(dirpath, filename)) file_ext = os.path.splitext(filename)[1] if file_ext not in self.extensions or is_ignored(file_path, self.ignore_patterns): if self.verbosity > 1: self.stdout.write('ignoring file %s in %s\n' % (filename, dirpath)) else: locale_dir = None for path in self.locale_paths: if os.path.abspath(dirpath).startswith(os.path.dirname(path)): locale_dir = path break locale_dir = locale_dir or self.default_locale_path or NO_LOCALE_DIR all_files.append(self.translatable_file_class(dirpath, filename, locale_dir)) return sorted(all_files) def process_files(self, file_list): """ Group translatable files by locale directory and run pot file build process for each group. """ file_groups = {} for translatable in file_list: file_group = file_groups.setdefault(translatable.locale_dir, []) file_group.append(translatable) for locale_dir, files in file_groups.items(): self.process_locale_dir(locale_dir, files) def process_locale_dir(self, locale_dir, files): """ Extract translatable literals from the specified files, creating or updating the POT file for a given locale directory. Use the xgettext GNU gettext utility. """ build_files = [] for translatable in files: if self.verbosity > 1: self.stdout.write('processing file %s in %s\n' % ( translatable.file, translatable.dirpath )) if self.domain not in ('djangojs', 'django'): continue build_file = self.build_file_class(self, self.domain, translatable) try: build_file.preprocess() except UnicodeDecodeError as e: self.stdout.write( 'UnicodeDecodeError: skipped file %s in %s (reason: %s)' % ( translatable.file, translatable.dirpath, e, ) ) continue build_files.append(build_file) if self.domain == 'djangojs': is_templatized = build_file.is_templatized args = [ 'xgettext', '-d', self.domain, '--language=%s' % ('C' if is_templatized else 'JavaScript',), '--keyword=gettext_noop', '--keyword=gettext_lazy', '--keyword=ngettext_lazy:1,2', '--keyword=pgettext:1c,2', '--keyword=npgettext:1c,2,3', '--output=-', ] elif self.domain == 'django': args = [ 'xgettext', '-d', self.domain, '--language=Python', '--keyword=gettext_noop', '--keyword=gettext_lazy', '--keyword=ngettext_lazy:1,2', '--keyword=ugettext_noop', '--keyword=ugettext_lazy', '--keyword=ungettext_lazy:1,2', '--keyword=pgettext:1c,2', '--keyword=npgettext:1c,2,3', '--keyword=pgettext_lazy:1c,2', '--keyword=npgettext_lazy:1c,2,3', '--output=-', ] else: return input_files = [bf.work_path for bf in build_files] with NamedTemporaryFile(mode='w+') as input_files_list: input_files_list.write(('\n'.join(input_files))) input_files_list.flush() args.extend(['--files-from', input_files_list.name]) args.extend(self.xgettext_options) msgs, errors, status = popen_wrapper(args) if errors: if status != STATUS_OK: for build_file in build_files: build_file.cleanup() raise CommandError( 'errors happened while running xgettext on %s\n%s' % ('\n'.join(input_files), errors) ) elif self.verbosity > 0: # Print warnings self.stdout.write(errors) if msgs: if locale_dir is NO_LOCALE_DIR: file_path = os.path.normpath(build_files[0].path) raise CommandError( 'Unable to find a locale path to store translations for ' 'file %s' % file_path ) for build_file in build_files: msgs = build_file.postprocess_messages(msgs) potfile = os.path.join(locale_dir, '%s.pot' % self.domain) write_pot_file(potfile, msgs) for build_file in build_files: build_file.cleanup() def write_po_file(self, potfile, locale): """ Create or update the PO file for self.domain and `locale`. Use contents of the existing `potfile`. Use msgmerge and msgattrib GNU gettext utilities. """ basedir = os.path.join(os.path.dirname(potfile), locale, 'LC_MESSAGES') if not os.path.isdir(basedir): os.makedirs(basedir) pofile = os.path.join(basedir, '%s.po' % self.domain) if os.path.exists(pofile): args = ['msgmerge'] + self.msgmerge_options + [pofile, potfile] msgs, errors, status = popen_wrapper(args) if errors: if status != STATUS_OK: raise CommandError( "errors happened while running msgmerge\n%s" % errors) elif self.verbosity > 0: self.stdout.write(errors) else: with open(potfile, 'r', encoding='utf-8') as fp: msgs = fp.read() if not self.invoked_for_django: msgs = self.copy_plural_forms(msgs, locale) msgs = normalize_eols(msgs) msgs = msgs.replace( "#. #-#-#-#-# %s.pot (PACKAGE VERSION) #-#-#-#-#\n" % self.domain, "") with open(pofile, 'w', encoding='utf-8') as fp: fp.write(msgs) if self.no_obsolete: args = ['msgattrib'] + self.msgattrib_options + ['-o', pofile, pofile] msgs, errors, status = popen_wrapper(args) if errors: if status != STATUS_OK: raise CommandError( "errors happened while running msgattrib\n%s" % errors) elif self.verbosity > 0: self.stdout.write(errors) def copy_plural_forms(self, msgs, locale): """ Copy plural forms header contents from a Django catalog of locale to the msgs string, inserting it at the right place. msgs should be the contents of a newly created .po file. """ django_dir = os.path.normpath(os.path.join(os.path.dirname(django.__file__))) if self.domain == 'djangojs': domains = ('djangojs', 'django') else: domains = ('django',) for domain in domains: django_po = os.path.join(django_dir, 'conf', 'locale', locale, 'LC_MESSAGES', '%s.po' % domain) if os.path.exists(django_po): with open(django_po, 'r', encoding='utf-8') as fp: m = plural_forms_re.search(fp.read()) if m: plural_form_line = m.group('value') if self.verbosity > 1: self.stdout.write("copying plural forms: %s\n" % plural_form_line) lines = [] found = False for line in msgs.splitlines(): if not found and (not line or plural_forms_re.search(line)): line = plural_form_line found = True lines.append(line) msgs = '\n'.join(lines) break return msgs