diff options
author | Caolán McNamara <caolanm@redhat.com> | 2017-07-12 15:18:55 +0100 |
---|---|---|
committer | Caolán McNamara <caolanm@redhat.com> | 2017-07-12 16:25:51 +0200 |
commit | c3d8711aa109ab9081108c77dc7af396444fd3f1 (patch) | |
tree | d0e8db4c0bc8f95c9bb85126a8441cb6e5fae86e /solenv | |
parent | 57ffece4297f51bb1abd5093c58fc56be8aea2b8 (diff) |
add script to update translations for gettext
Change-Id: I1ec48a71103aad50e8a303a5356a25f3a168d6bd
Reviewed-on: https://gerrit.libreoffice.org/39867
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Tested-by: Caolán McNamara <caolanm@redhat.com>
Diffstat (limited to 'solenv')
-rw-r--r-- | solenv/bin/polib.py | 1868 | ||||
-rw-r--r-- | solenv/bin/update-for-gettext | 76 |
2 files changed, 1944 insertions, 0 deletions
diff --git a/solenv/bin/polib.py b/solenv/bin/polib.py new file mode 100644 index 000000000000..b835167729e8 --- /dev/null +++ b/solenv/bin/polib.py @@ -0,0 +1,1868 @@ +# -* coding: utf-8 -*- +# +# License: MIT (see LICENSE file provided) +# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: +8 +""" +**polib** allows you to manipulate, create, modify gettext files (pot, po and +mo files). You can load existing files, iterate through it's entries, add, +modify entries, comments or metadata, etc. or create new po files from scratch. + +**polib** provides a simple and pythonic API via the :func:`~polib.pofile` and +:func:`~polib.mofile` convenience functions. +""" + +__author__ = 'David Jean Louis <izimobil@gmail.com>' +__version__ = '1.0.8' +__all__ = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', + 'default_encoding', 'escape', 'unescape', 'detect_encoding', ] + +import array +import codecs +import os +import re +import struct +import sys +import textwrap +import binascii + +try: + import io +except ImportError: + # replacement of io.open() for python < 2.6 + # we use codecs instead + class io(object): + @staticmethod + def open(fpath, mode='r', encoding=None): + return codecs.open(fpath, mode, encoding) + + +# the default encoding to use when encoding cannot be detected +default_encoding = 'utf-8' + +# python 2/3 compatibility helpers {{{ + + +if sys.version_info[:2] < (3, 0): + PY3 = False + text_type = unicode + + def b(s): + return s + + def u(s): + return unicode(s, "unicode_escape") + +else: + PY3 = True + text_type = str + + def b(s): + return s.encode("latin-1") + + def u(s): + return s +# }}} +# _pofile_or_mofile {{{ + + +def _pofile_or_mofile(f, type, **kwargs): + """ + Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to + honor the DRY concept. + """ + # get the file encoding + enc = kwargs.get('encoding') + if enc is None: + enc = detect_encoding(f, type == 'mofile') + + # parse the file + kls = type == 'pofile' and _POFileParser or _MOFileParser + parser = kls( + f, + encoding=enc, + check_for_duplicates=kwargs.get('check_for_duplicates', False), + klass=kwargs.get('klass') + ) + instance = parser.parse() + instance.wrapwidth = kwargs.get('wrapwidth', 78) + return instance +# }}} +# _is_file {{{ + + +def _is_file(filename_or_contents): + """ + Safely returns the value of os.path.exists(filename_or_contents). + + Arguments: + + ``filename_or_contents`` + either a filename, or a string holding the contents of some file. + In the latter case, this function will always return False. + """ + try: + return os.path.exists(filename_or_contents) + except (ValueError, UnicodeEncodeError): + return False +# }}} +# function pofile() {{{ + + +def pofile(pofile, **kwargs): + """ + Convenience function that parses the po or pot file ``pofile`` and returns + a :class:`~polib.POFile` instance. + + Arguments: + + ``pofile`` + string, full or relative path to the po/pot file or its content (data). + + ``wrapwidth`` + integer, the wrap width, only useful when the ``-w`` option was passed + to xgettext (optional, default: ``78``). + + ``encoding`` + string, the encoding to use (e.g. "utf-8") (default: ``None``, the + encoding will be auto-detected). + + ``check_for_duplicates`` + whether to check for duplicate entries when adding entries to the + file (optional, default: ``False``). + + ``klass`` + class which is used to instantiate the return value (optional, + default: ``None``, the return value with be a :class:`~polib.POFile` + instance). + """ + return _pofile_or_mofile(pofile, 'pofile', **kwargs) +# }}} +# function mofile() {{{ + + +def mofile(mofile, **kwargs): + """ + Convenience function that parses the mo file ``mofile`` and returns a + :class:`~polib.MOFile` instance. + + Arguments: + + ``mofile`` + string, full or relative path to the mo file or its content (data). + + ``wrapwidth`` + integer, the wrap width, only useful when the ``-w`` option was passed + to xgettext to generate the po file that was used to format the mo file + (optional, default: ``78``). + + ``encoding`` + string, the encoding to use (e.g. "utf-8") (default: ``None``, the + encoding will be auto-detected). + + ``check_for_duplicates`` + whether to check for duplicate entries when adding entries to the + file (optional, default: ``False``). + + ``klass`` + class which is used to instantiate the return value (optional, + default: ``None``, the return value with be a :class:`~polib.POFile` + instance). + """ + return _pofile_or_mofile(mofile, 'mofile', **kwargs) +# }}} +# function detect_encoding() {{{ + + +def detect_encoding(file, binary_mode=False): + """ + Try to detect the encoding used by the ``file``. The ``file`` argument can + be a PO or MO file path or a string containing the contents of the file. + If the encoding cannot be detected, the function will return the value of + ``default_encoding``. + + Arguments: + + ``file`` + string, full or relative path to the po/mo file or its content. + + ``binary_mode`` + boolean, set this to True if ``file`` is a mo file. + """ + PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)' + rxt = re.compile(u(PATTERN)) + rxb = re.compile(b(PATTERN)) + + def charset_exists(charset): + """Check whether ``charset`` is valid or not.""" + try: + codecs.lookup(charset) + except LookupError: + return False + return True + + if not _is_file(file): + match = rxt.search(file) + if match: + enc = match.group(1).strip() + if charset_exists(enc): + return enc + else: + # For PY3, always treat as binary + if binary_mode or PY3: + mode = 'rb' + rx = rxb + else: + mode = 'r' + rx = rxt + f = open(file, mode) + for l in f.readlines(): + match = rx.search(l) + if match: + f.close() + enc = match.group(1).strip() + if not isinstance(enc, text_type): + enc = enc.decode('utf-8') + if charset_exists(enc): + return enc + f.close() + return default_encoding +# }}} +# function escape() {{{ + + +def escape(st): + """ + Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in + the given string ``st`` and returns it. + """ + return st.replace('\\', r'\\')\ + .replace('\t', r'\t')\ + .replace('\r', r'\r')\ + .replace('\n', r'\n')\ + .replace('\"', r'\"') +# }}} +# function unescape() {{{ + + +def unescape(st): + """ + Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in + the given string ``st`` and returns it. + """ + def unescape_repl(m): + m = m.group(1) + if m == 'n': + return '\n' + if m == 't': + return '\t' + if m == 'r': + return '\r' + if m == '\\': + return '\\' + return m # handles escaped double quote + return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st) +# }}} +# function natural_sort() {{{ + + +def natural_sort(lst): + """ + Sort naturally the given list. + Credits: http://stackoverflow.com/a/4836734 + """ + convert = lambda text: int(text) if text.isdigit() else text.lower() + alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] + return sorted(lst, key = alphanum_key) +# }}} +# class _BaseFile {{{ + + +class _BaseFile(list): + """ + Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile` + classes. This class should **not** be instantiated directly. + """ + + def __init__(self, *args, **kwargs): + """ + Constructor, accepts the following keyword arguments: + + ``pofile`` + string, the path to the po or mo file, or its content as a string. + + ``wrapwidth`` + integer, the wrap width, only useful when the ``-w`` option was + passed to xgettext (optional, default: ``78``). + + ``encoding`` + string, the encoding to use, defaults to ``default_encoding`` + global variable (optional). + + ``check_for_duplicates`` + whether to check for duplicate entries when adding entries to the + file, (optional, default: ``False``). + """ + list.__init__(self) + # the opened file handle + pofile = kwargs.get('pofile', None) + if pofile and _is_file(pofile): + self.fpath = pofile + else: + self.fpath = kwargs.get('fpath') + # the width at which lines should be wrapped + self.wrapwidth = kwargs.get('wrapwidth', 78) + # the file encoding + self.encoding = kwargs.get('encoding', default_encoding) + # whether to check for duplicate entries or not + self.check_for_duplicates = kwargs.get('check_for_duplicates', False) + # header + self.header = '' + # both po and mo files have metadata + self.metadata = {} + self.metadata_is_fuzzy = 0 + + def __unicode__(self): + """ + Returns the unicode representation of the file. + """ + ret = [] + entries = [self.metadata_as_entry()] + \ + [e for e in self if not e.obsolete] + for entry in entries: + ret.append(entry.__unicode__(self.wrapwidth)) + for entry in self.obsolete_entries(): + ret.append(entry.__unicode__(self.wrapwidth)) + ret = u('\n').join(ret) + + assert isinstance(ret, text_type) + #if type(ret) != text_type: + # return unicode(ret, self.encoding) + return ret + + if PY3: + def __str__(self): + return self.__unicode__() + else: + def __str__(self): + """ + Returns the string representation of the file. + """ + return unicode(self).encode(self.encoding) + + def __contains__(self, entry): + """ + Overridden ``list`` method to implement the membership test (in and + not in). + The method considers that an entry is in the file if it finds an entry + that has the same msgid (the test is **case sensitive**) and the same + msgctxt (or none for both entries). + + Argument: + + ``entry`` + an instance of :class:`~polib._BaseEntry`. + """ + return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) \ + is not None + + def __eq__(self, other): + return str(self) == str(other) + + def append(self, entry): + """ + Overridden method to check for duplicates entries, if a user tries to + add an entry that is already in the file, the method will raise a + ``ValueError`` exception. + + Argument: + + ``entry`` + an instance of :class:`~polib._BaseEntry`. + """ + # check_for_duplicates may not be defined (yet) when unpickling. + # But if pickling, we never want to check for duplicates anyway. + if getattr(self, 'check_for_duplicates', False) and entry in self: + raise ValueError('Entry "%s" already exists' % entry.msgid) + super(_BaseFile, self).append(entry) + + def insert(self, index, entry): + """ + Overridden method to check for duplicates entries, if a user tries to + add an entry that is already in the file, the method will raise a + ``ValueError`` exception. + + Arguments: + + ``index`` + index at which the entry should be inserted. + + ``entry`` + an instance of :class:`~polib._BaseEntry`. + """ + if self.check_for_duplicates and entry in self: + raise ValueError('Entry "%s" already exists' % entry.msgid) + super(_BaseFile, self).insert(index, entry) + + def metadata_as_entry(self): + """ + Returns the file metadata as a :class:`~polib.POFile` instance. + """ + e = POEntry(msgid='') + mdata = self.ordered_metadata() + if mdata: + strs = [] + for name, value in mdata: + # Strip whitespace off each line in a multi-line entry + strs.append('%s: %s' % (name, value)) + e.msgstr = '\n'.join(strs) + '\n' + if self.metadata_is_fuzzy: + e.flags.append('fuzzy') + return e + + def save(self, fpath=None, repr_method='__unicode__'): + """ + Saves the po file to ``fpath``. + If it is an existing file and no ``fpath`` is provided, then the + existing file is rewritten with the modified data. + + Keyword arguments: + + ``fpath`` + string, full or relative path to the file. + + ``repr_method`` + string, the method to use for output. + """ + if self.fpath is None and fpath is None: + raise IOError('You must provide a file path to save() method') + contents = getattr(self, repr_method)() + if fpath is None: + fpath = self.fpath + if repr_method == 'to_binary': + fhandle = open(fpath, 'wb') + else: + fhandle = io.open(fpath, 'w', encoding=self.encoding) + if not isinstance(contents, text_type): + contents = contents.decode(self.encoding) + fhandle.write(contents) + fhandle.close() + # set the file path if not set + if self.fpath is None and fpath: + self.fpath = fpath + + def find(self, st, by='msgid', include_obsolete_entries=False, + msgctxt=False): + """ + Find the entry which msgid (or property identified by the ``by`` + argument) matches the string ``st``. + + Keyword arguments: + + ``st`` + string, the string to search for. + + ``by`` + string, the property to use for comparison (default: ``msgid``). + + ``include_obsolete_entries`` + boolean, whether to also search in entries that are obsolete. + + ``msgctxt`` + string, allows specifying a specific message context for the + search. + """ + if include_obsolete_entries: + entries = self[:] + else: + entries = [e for e in self if not e.obsolete] + for e in entries: + if getattr(e, by) == st: + if msgctxt is not False and e.msgctxt != msgctxt: + continue + return e + return None + + def ordered_metadata(self): + """ + Convenience method that returns an ordered version of the metadata + dictionary. The return value is list of tuples (metadata name, + metadata_value). + """ + # copy the dict first + metadata = self.metadata.copy() + data_order = [ + 'Project-Id-Version', + 'Report-Msgid-Bugs-To', + 'POT-Creation-Date', + 'PO-Revision-Date', + 'Last-Translator', + 'Language-Team', + 'Language', + 'MIME-Version', + 'Content-Type', + 'Content-Transfer-Encoding', + 'Plural-Forms' + ] + ordered_data = [] + for data in data_order: + try: + value = metadata.pop(data) + ordered_data.append((data, value)) + except KeyError: + pass + # the rest of the metadata will be alphabetically ordered since there + # are no specs for this AFAIK + for data in natural_sort(metadata.keys()): + value = metadata[data] + ordered_data.append((data, value)) + return ordered_data + + def to_binary(self): + """ + Return the binary representation of the file. + """ + offsets = [] + entries = self.translated_entries() + + # the keys are sorted in the .mo file + def cmp(_self, other): + # msgfmt compares entries with msgctxt if it exists + self_msgid = _self.msgctxt and _self.msgctxt or _self.msgid + other_msgid = other.msgctxt and other.msgctxt or other.msgid + if self_msgid > other_msgid: + return 1 + elif self_msgid < other_msgid: + return -1 + else: + return 0 + # add metadata entry + entries.sort(key=lambda o: o.msgctxt or o.msgid) + mentry = self.metadata_as_entry() + #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() + entries = [mentry] + entries + entries_len = len(entries) + ids, strs = b(''), b('') + for e in entries: + # For each string, we need size and file offset. Each string is + # NUL terminated; the NUL does not count into the size. + msgid = b('') + if e.msgctxt: + # Contexts are stored by storing the concatenation of the + # context, a <EOT> byte, and the original string + msgid = self._encode(e.msgctxt + '\4') + if e.msgid_plural: + msgstr = [] + for index in sorted(e.msgstr_plural.keys()): + msgstr.append(e.msgstr_plural[index]) + msgid += self._encode(e.msgid + '\0' + e.msgid_plural) + msgstr = self._encode('\0'.join(msgstr)) + else: + msgid += self._encode(e.msgid) + msgstr = self._encode(e.msgstr) + offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) + ids += msgid + b('\0') + strs += msgstr + b('\0') + + # The header is 7 32-bit unsigned integers. + keystart = 7 * 4 + 16 * entries_len + # and the values start after the keys + valuestart = keystart + len(ids) + koffsets = [] + voffsets = [] + # The string table first has the list of keys, then the list of values. + # Each entry has first the size of the string, then the file offset. + for o1, l1, o2, l2 in offsets: + koffsets += [l1, o1 + keystart] + voffsets += [l2, o2 + valuestart] + offsets = koffsets + voffsets + + output = struct.pack( + "Iiiiiii", + # Magic number + MOFile.MAGIC, + # Version + 0, + # number of entries + entries_len, + # start of key index + 7 * 4, + # start of value index + 7 * 4 + entries_len * 8, + # size and offset of hash table, we don't use hash tables + 0, keystart + + ) + if PY3 and sys.version_info.minor > 1: # python 3.2 or superior + output += array.array("i", offsets).tobytes() + else: + output += array.array("i", offsets).tostring() + output += ids + output += strs + return output + + def _encode(self, mixed): + """ + Encodes the given ``mixed`` argument with the file encoding if and + only if it's an unicode string and returns the encoded string. + """ + if isinstance(mixed, text_type): + mixed = mixed.encode(self.encoding) + return mixed +# }}} +# class POFile {{{ + + +class POFile(_BaseFile): + """ + Po (or Pot) file reader/writer. + This class inherits the :class:`~polib._BaseFile` class and, by extension, + the python ``list`` type. + """ + + def __unicode__(self): + """ + Returns the unicode representation of the po file. + """ + ret, headers = '', self.header.split('\n') + for header in headers: + if not len(header): + ret += "#\n" + elif header[:1] in [',', ':']: + ret += '#%s\n' % header + else: + ret += '# %s\n' % header + + if not isinstance(ret, text_type): + ret = ret.decode(self.encoding) + + return ret + _BaseFile.__unicode__(self) + + def save_as_mofile(self, fpath): + """ + Saves the binary representation of the file to given ``fpath``. + + Keyword argument: + + ``fpath`` + string, full or relative path to the mo file. + """ + _BaseFile.save(self, fpath, 'to_binary') + + def percent_translated(self): + """ + Convenience method that returns the percentage of translated + messages. + """ + total = len([e for e in self if not e.obsolete]) + if total == 0: + return 100 + translated = len(self.translated_entries()) + return int(translated * 100 / float(total)) + + def translated_entries(self): + """ + Convenience method that returns the list of translated entries. + """ + return [e for e in self if e.translated()] + + def untranslated_entries(self): + """ + Convenience method that returns the list of untranslated entries. + """ + return [e for e in self if not e.translated() and not e.obsolete + and not 'fuzzy' in e.flags] + + def fuzzy_entries(self): + """ + Convenience method that returns the list of fuzzy entries. + """ + return [e for e in self if 'fuzzy' in e.flags] + + def obsolete_entries(self): + """ + Convenience method that returns the list of obsolete entries. + """ + return [e for e in self if e.obsolete] + + def merge(self, refpot): + """ + Convenience method that merges the current pofile with the pot file + provided. It behaves exactly as the gettext msgmerge utility: + + * comments of this file will be preserved, but extracted comments and + occurrences will be discarded; + * any translations or comments in the file will be discarded, however, + dot comments and file positions will be preserved; + * the fuzzy flags are preserved. + + Keyword argument: + + ``refpot`` + object POFile, the reference catalog. + """ + # Store entries in dict/set for faster access + self_entries = dict((entry.msgid, entry) for entry in self) + refpot_msgids = set(entry.msgid for entry in refpot) + # Merge entries that are in the refpot + for entry in refpot: + e = self_entries.get(entry.msgid) + if e is None: + e = POEntry() + self.append(e) + e.merge(entry) + # ok, now we must "obsolete" entries that are not in the refpot anymore + for entry in self: + if entry.msgid not in refpot_msgids: + entry.obsolete = True +# }}} +# class MOFile {{{ + + +class MOFile(_BaseFile): + """ + Mo file reader/writer. + This class inherits the :class:`~polib._BaseFile` class and, by + extension, the python ``list`` type. + """ + MAGIC = 0x950412de + MAGIC_SWAPPED = 0xde120495 + + def __init__(self, *args, **kwargs): + """ + Constructor, accepts all keywords arguments accepted by + :class:`~polib._BaseFile` class. + """ + _BaseFile.__init__(self, *args, **kwargs) + self.magic_number = None + self.version = 0 + + def save_as_pofile(self, fpath): + """ + Saves the mofile as a pofile to ``fpath``. + + Keyword argument: + + ``fpath`` + string, full or relative path to the file. + """ + _BaseFile.save(self, fpath) + + def save(self, fpath=None): + """ + Saves the mofile to ``fpath``. + + Keyword argument: + + ``fpath`` + string, full or relative path to the file. + """ + _BaseFile.save(self, fpath, 'to_binary') + + def percent_translated(self): + """ + Convenience method to keep the same interface with POFile instances. + """ + return 100 + + def translated_entries(self): + """ + Convenience method to keep the same interface with POFile instances. + """ + return self + + def untranslated_entries(self): + """ + Convenience method to keep the same interface with POFile instances. + """ + return [] + + def fuzzy_entries(self): + """ + Convenience method to keep the same interface with POFile instances. + """ + return [] + + def obsolete_entries(self): + """ + Convenience method to keep the same interface with POFile instances. + """ + return [] +# }}} +# class _BaseEntry {{{ + + +class _BaseEntry(object): + """ + Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes. + This class should **not** be instantiated directly. + """ + + def __init__(self, *args, **kwargs): + """ + Constructor, accepts the following keyword arguments: + + ``msgid`` + string, the entry msgid. + + ``msgstr`` + string, the entry msgstr. + + ``msgid_plural`` + string, the entry msgid_plural. + + ``msgstr_plural`` + list, the entry msgstr_plural lines. + + ``msgctxt`` + string, the entry context (msgctxt). + + ``obsolete`` + bool, whether the entry is "obsolete" or not. + + ``encoding`` + string, the encoding to use, defaults to ``default_encoding`` + global variable (optional). + """ + self.msgid = kwargs.get('msgid', '') + self.msgstr = kwargs.get('msgstr', '') + self.msgid_plural = kwargs.get('msgid_plural', '') + self.msgstr_plural = kwargs.get('msgstr_plural', {}) + self.msgctxt = kwargs.get('msgctxt', None) + self.obsolete = kwargs.get('obsolete', False) + self.encoding = kwargs.get('encoding', default_encoding) + + def __unicode__(self, wrapwidth=78): + """ + Returns the unicode representation of the entry. + """ + if self.obsolete: + delflag = '#~ ' + else: + delflag = '' + ret = [] + # write the msgctxt if any + if self.msgctxt is not None: + ret += self._str_field("msgctxt", delflag, "", self.msgctxt, + wrapwidth) + # write the msgid + ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth) + # write the msgid_plural if any + if self.msgid_plural: + ret += self._str_field("msgid_plural", delflag, "", + self.msgid_plural, wrapwidth) + if self.msgstr_plural: + # write the msgstr_plural if any + msgstrs = self.msgstr_plural + keys = list(msgstrs) + keys.sort() + for index in keys: + msgstr = msgstrs[index] + plural_index = '[%s]' % index + ret += self._str_field("msgstr", delflag, plural_index, msgstr, + wrapwidth) + else: + # otherwise write the msgstr + ret += self._str_field("msgstr", delflag, "", self.msgstr, + wrapwidth) + ret.append('') + usedirect = True + if type(ret[0] != unicode): + try: + usedirect = False + ret = u('\n').join(x.decode('utf-8') for x in ret) + except: + usedirect = True + if usedirect: + ret = u('\n').join(ret) + return ret + + if PY3: + def __str__(self): + return self.__unicode__() + else: + def __str__(self): + """ + Returns the string representation of the entry. + """ + return unicode(self).encode(self.encoding) + + def __eq__(self, other): + return str(self) == str(other) + + def _str_field(self, fieldname, delflag, plural_index, field, + wrapwidth=78): + lines = field.splitlines(True) + if len(lines) > 1: + lines = [''] + lines # start with initial empty line + else: + escaped_field = escape(field) + specialchars_count = 0 + for c in ['\\', '\n', '\r', '\t', '"']: + specialchars_count += field.count(c) + # comparison must take into account fieldname length + one space + # + 2 quotes (eg. msgid "<string>") + flength = len(fieldname) + 3 + if plural_index: + flength += len(plural_index) + real_wrapwidth = wrapwidth - flength + specialchars_count + if wrapwidth > 0 and len(field) > real_wrapwidth: + # Wrap the line but take field name into account + lines = [''] + [unescape(item) for item in wrap( + escaped_field, + wrapwidth - 2, # 2 for quotes "" + drop_whitespace=False, + break_long_words=False + )] + else: + lines = [field] + if fieldname.startswith('previous_'): + # quick and dirty trick to get the real field name + fieldname = fieldname[9:] + + ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, + escape(lines.pop(0)))] + for line in lines: + ret.append('%s"%s"' % (delflag, escape(line))) + return ret +# }}} +# class POEntry {{{ + + +class POEntry(_BaseEntry): + """ + Represents a po file entry. + """ + + def __init__(self, *args, **kwargs): + """ + Constructor, accepts the following keyword arguments: + + ``comment`` + string, the entry comment. + + ``tcomment`` + string, the entry translator comment. + + ``occurrences`` + list, the entry occurrences. + + ``flags`` + list, the entry flags. + + ``previous_msgctxt`` + string, the entry previous context. + + ``previous_msgid`` + string, the entry previous msgid. + + ``previous_msgid_plural`` + string, the entry previous msgid_plural. + + ``linenum`` + integer, the line number of the entry + """ + _BaseEntry.__init__(self, *args, **kwargs) + self.comment = kwargs.get('comment', '') + self.tcomment = kwargs.get('tcomment', '') + self.occurrences = kwargs.get('occurrences', []) + self.flags = kwargs.get('flags', []) + self.previous_msgctxt = kwargs.get('previous_msgctxt', None) + self.previous_msgid = kwargs.get('previous_msgid', None) + self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None) + self.linenum = kwargs.get('linenum', None) + + def __unicode__(self, wrapwidth=0): + """ + Returns the unicode representation of the entry. + """ + ret = [] + # comments first, if any (with text wrapping as xgettext does) + if self.obsolete: + comments = [('tcomment', '# ')] + else: + comments = [('comment', '#. '), ('tcomment', '# ')] + for c in comments: + val = getattr(self, c[0]) + if val: + for comment in val.split('\n'): + if wrapwidth > 0 and len(comment) + len(c[1]) > wrapwidth: + ret += wrap( + comment, + wrapwidth, + initial_indent=c[1], + subsequent_indent=c[1], + break_long_words=False + ) + else: + ret.append('%s%s' % (c[1], comment)) + + # occurrences (with text wrapping as xgettext does) + if not self.obsolete and self.occurrences: + filelist = [] + for fpath, lineno in self.occurrences: + if lineno: + filelist.append('%s:%s' % (fpath, lineno)) + else: + filelist.append(fpath) + filestr = ' '.join(filelist) + if wrapwidth > 0 and len(filestr) + 3 > wrapwidth: + # textwrap split words that contain hyphen, this is not + # what we want for filenames, so the dirty hack is to + # temporally replace hyphens with a char that a file cannot + # contain, like "*" + ret += [l.replace('*', '-') for l in wrap( + filestr.replace('-', '*'), + wrapwidth, + initial_indent='#: ', + subsequent_indent='#: ', + break_long_words=False + )] + else: + ret.append('#: ' + filestr) + + # flags (TODO: wrapping ?) + if self.flags: + ret.append('#, %s' % ', '.join(self.flags)) + + # previous context and previous msgid/msgid_plural + fields = ['previous_msgctxt', 'previous_msgid', + 'previous_msgid_plural'] + if self.obsolete: + prefix = "#~| " + else: + prefix = "#| " + for f in fields: + val = getattr(self, f) + if val: + ret += self._str_field(f, prefix, "", val, wrapwidth) + + ret.append(_BaseEntry.__unicode__(self, wrapwidth)) + ret = u('\n').join(ret) + return ret + + def __cmp__(self, other): + """ + Called by comparison operations if rich comparison is not defined. + """ + + # First: Obsolete test + if self.obsolete != other.obsolete: + if self.obsolete: + return -1 + else: + return 1 + # Work on a copy to protect original + occ1 = sorted(self.occurrences[:]) + occ2 = sorted(other.occurrences[:]) + pos = 0 + for entry1 in occ1: + try: + entry2 = occ2[pos] + except IndexError: + return 1 + pos = pos + 1 + if entry1[0] != entry2[0]: + if entry1[0] > entry2[0]: + return 1 + else: + return -1 + if entry1[1] != entry2[1]: + if entry1[1] > entry2[1]: + return 1 + else: + return -1 + # Compare msgid_plural if set + if self.msgid_plural: + if not other.msgid_plural: + return 1 + for pos in self.msgid_plural: + if pos not in other.msgid_plural: + return 1 + if self.msgid_plural[pos] > other.msgid_plural[pos]: + return 1 + if self.msgid_plural[pos] < other.msgid_plural[pos]: + return -1 + # Finally: Compare message ID + if self.msgid > other.msgid: + return 1 + elif self.msgid < other.msgid: + return -1 + return 0 + + def __gt__(self, other): + return self.__cmp__(other) > 0 + + def __lt__(self, other): + return self.__cmp__(other) < 0 + + def __ge__(self, other): + return self.__cmp__(other) >= 0 + + def __le__(self, other): + return self.__cmp__(other) <= 0 + + def __eq__(self, other): + return self.__cmp__(other) == 0 + + def __ne__(self, other): + return self.__cmp__(other) != 0 + + def translated(self): + """ + Returns ``True`` if the entry has been translated or ``False`` + otherwise. + """ + if self.obsolete or 'fuzzy' in self.flags: + return False + if self.msgstr != '': + return True + if self.msgstr_plural: + for pos in self.msgstr_plural: + if self.msgstr_plural[pos] == '': + return False + return True + return False + + def merge(self, other): + """ + Merge the current entry with the given pot entry. + """ + self.msgid = other.msgid + self.msgctxt = other.msgctxt + self.occurrences = other.occurrences + self.comment = other.comment + fuzzy = 'fuzzy' in self.flags + self.flags = other.flags[:] # clone flags + if fuzzy: + self.flags.append('fuzzy') + self.msgid_plural = other.msgid_plural + self.obsolete = other.obsolete + self.previous_msgctxt = other.previous_msgctxt + self.previous_msgid = other.previous_msgid + self.previous_msgid_plural = other.previous_msgid_plural + if other.msgstr_plural: + for pos in other.msgstr_plural: + try: + # keep existing translation at pos if any + self.msgstr_plural[pos] + except KeyError: + self.msgstr_plural[pos] = '' + + def __hash__(self): + return hash((self.msgid, self.msgstr)) +# }}} +# class MOEntry {{{ + + +class MOEntry(_BaseEntry): + """ + Represents a mo file entry. + """ + def __init__(self, *args, **kwargs): + """ + Constructor, accepts the following keyword arguments, + for consistency with :class:`~polib.POEntry`: + + ``comment`` + ``tcomment`` + ``occurrences`` + ``flags`` + ``previous_msgctxt`` + ``previous_msgid`` + ``previous_msgid_plural`` + + Note: even though these keyword arguments are accepted, + they hold no real meaning in the context of MO files + and are simply ignored. + """ + _BaseEntry.__init__(self, *args, **kwargs) + self.comment = '' + self.tcomment = '' + self.occurrences = [] + self.flags = [] + self.previous_msgctxt = None + self.previous_msgid = None + self.previous_msgid_plural = None + + def __hash__(self): + return hash((self.msgid, self.msgstr)) + +# }}} +# class _POFileParser {{{ + + +class _POFileParser(object): + """ + A finite state machine to parse efficiently and correctly po + file format. + """ + + def __init__(self, pofile, *args, **kwargs): + """ + Constructor. + + Keyword arguments: + + ``pofile`` + string, path to the po file or its content + + ``encoding`` + string, the encoding to use, defaults to ``default_encoding`` + global variable (optional). + + ``check_for_duplicates`` + whether to check for duplicate entries when adding entries to the + file (optional, default: ``False``). + """ + enc = kwargs.get('encoding', default_encoding) + if _is_file(pofile): + try: + self.fhandle = io.open(pofile, 'rt', encoding=enc) + except LookupError: + enc = default_encoding + self.fhandle = io.open(pofile, 'rt', encoding=enc) + else: + self.fhandle = pofile.splitlines() + + klass = kwargs.get('klass') + if klass is None: + klass = POFile + self.instance = klass( + pofile=pofile, + encoding=enc, + check_for_duplicates=kwargs.get('check_for_duplicates', False) + ) + self.transitions = {} + self.current_line = 0 + self.current_entry = POEntry(linenum=self.current_line) + self.current_state = 'st' + self.current_token = None + # two memo flags used in handlers + self.msgstr_index = 0 + self.entry_obsolete = 0 + # Configure the state machine, by adding transitions. + # Signification of symbols: + # * ST: Beginning of the file (start) + # * HE: Header + # * TC: a translation comment + # * GC: a generated comment + # * OC: a file/line occurrence + # * FL: a flags line + # * CT: a message context + # * PC: a previous msgctxt + # * PM: a previous msgid + # * PP: a previous msgid_plural + # * MI: a msgid + # * MP: a msgid plural + # * MS: a msgstr + # * MX: a msgstr plural + # * MC: a msgid or msgstr continuation line + all = ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc', + 'ms', 'mp', 'mx', 'mi'] + + self.add('tc', ['st', 'he'], 'he') + self.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', + 'mp', 'mx', 'mi'], 'tc') + self.add('gc', all, 'gc') + self.add('oc', all, 'oc') + self.add('fl', all, 'fl') + self.add('pc', all, 'pc') + self.add('pm', all, 'pm') + self.add('pp', all, 'pp') + self.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', + 'pp', 'ms', 'mx'], 'ct') + self.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc', + 'pm', 'pp', 'ms', 'mx'], 'mi') + self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp') + self.add('ms', ['mi', 'mp', 'tc'], 'ms') + self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx') + self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc') + + def parse(self): + """ + Run the state machine, parse the file line by line and call process() + with the current matched symbol. + """ + + keywords = { + 'msgctxt': 'ct', + 'msgid': 'mi', + 'msgstr': 'ms', + 'msgid_plural': 'mp', + } + prev_keywords = { + 'msgid_plural': 'pp', + 'msgid': 'pm', + 'msgctxt': 'pc', + } + tokens = [] + for line in self.fhandle: + self.current_line += 1 + line = line.strip() + if line == '': + continue + + tokens = line.split(None, 2) + nb_tokens = len(tokens) + + if tokens[0] == '#~|': + continue + + if tokens[0] == '#~' and nb_tokens > 1: + line = line[3:].strip() + tokens = tokens[1:] + nb_tokens -= 1 + self.entry_obsolete = 1 + else: + self.entry_obsolete = 0 + + # Take care of keywords like + # msgid, msgid_plural, msgctxt & msgstr. + if tokens[0] in keywords and nb_tokens > 1: + line = line[len(tokens[0]):].lstrip() + if re.search(r'([^\\]|^)"', line[1:-1]): + raise IOError('Syntax error in po file %s (line %s): ' + 'unescaped double quote found' % + (self.instance.fpath, self.current_line)) + self.current_token = line + self.process(keywords[tokens[0]]) + continue + + self.current_token = line + + if tokens[0] == '#:': + if nb_tokens <= 1: + continue + # we are on a occurrences line + self.process('oc') + + elif line[:1] == '"': + # we are on a continuation line + if re.search(r'([^\\]|^)"', line[1:-1]): + raise IOError('Syntax error in po file %s (line %s): ' + 'unescaped double quote found' % + (self.instance.fpath, self.current_line)) + self.process('mc') + + elif line[:7] == 'msgstr[': + # we are on a msgstr plural + self.process('mx') + + elif tokens[0] == '#,': + if nb_tokens <= 1: + continue + # we are on a flags line + self.process('fl') + + elif tokens[0] == '#' or tokens[0].startswith('##'): + if line == '#': + line += ' ' + # we are on a translator comment line + self.process('tc') + + elif tokens[0] == '#.': + if nb_tokens <= 1: + continue + # we are on a generated comment line + self.process('gc') + + elif tokens[0] == '#|': + if nb_tokens <= 1: + raise IOError('Syntax error in po file %s (line %s)' % + (self.instance.fpath, self.current_line)) + + # Remove the marker and any whitespace right after that. + line = line[2:].lstrip() + self.current_token = line + + if tokens[1].startswith('"'): + # Continuation of previous metadata. + self.process('mc') + continue + + if nb_tokens == 2: + # Invalid continuation line. + raise IOError('Syntax error in po file %s (line %s): ' + 'invalid continuation line' % + (self.instance.fpath, self.current_line)) + + # we are on a "previous translation" comment line, + if tokens[1] not in prev_keywords: + # Unknown keyword in previous translation comment. + raise IOError('Syntax error in po file %s (line %s): ' + 'unknown keyword %s' % + (self.instance.fpath, self.current_line, + tokens[1])) + + # Remove the keyword and any whitespace + # between it and the starting quote. + line = line[len(tokens[1]):].lstrip() + self.current_token = line + self.process(prev_keywords[tokens[1]]) + + else: + raise IOError('Syntax error in po file %s (line %s)' % + (self.instance.fpath, self.current_line)) + + if self.current_entry and len(tokens) > 0 and \ + not tokens[0].startswith('#'): + # since entries are added when another entry is found, we must add + # the last entry here (only if there are lines). Trailing comments + # are ignored + self.instance.append(self.current_entry) + + # before returning the instance, check if there's metadata and if + # so extract it in a dict + metadataentry = self.instance.find('') + if metadataentry: # metadata found + # remove the entry + self.instance.remove(metadataentry) + self.instance.metadata_is_fuzzy = metadataentry.flags + key = None + for msg in metadataentry.msgstr.splitlines(): + try: + key, val = msg.split(':', 1) + self.instance.metadata[key] = val.strip() + except (ValueError, KeyError): + if key is not None: + self.instance.metadata[key] += '\n' + msg.strip() + # close opened file + if not isinstance(self.fhandle, list): # must be file + self.fhandle.close() + return self.instance + + def add(self, symbol, states, next_state): + """ + Add a transition to the state machine. + + Keywords arguments: + + ``symbol`` + string, the matched token (two chars symbol). + + ``states`` + list, a list of states (two chars symbols). + + ``next_state`` + the next state the fsm will have after the action. + """ + for state in states: + action = getattr(self, 'handle_%s' % next_state) + self.transitions[(symbol, state)] = (action, next_state) + + def process(self, symbol): + """ + Process the transition corresponding to the current state and the + symbol provided. + + Keywords arguments: + + ``symbol`` + string, the matched token (two chars symbol). + + ``linenum`` + integer, the current line number of the parsed file. + """ + try: + (action, state) = self.transitions[(symbol, self.current_state)] + if action(): + self.current_state = state + except Exception: + raise IOError('Syntax error in po file (line %s)' % + self.current_line) + + # state handlers + + def handle_he(self): + """Handle a header comment.""" + if self.instance.header != '': + self.instance.header += '\n' + self.instance.header += self.current_token[2:] + return 1 + + def handle_tc(self): + """Handle a translator comment.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + if self.current_entry.tcomment != '': + self.current_entry.tcomment += '\n' + tcomment = self.current_token.lstrip('#') + if tcomment.startswith(' '): + tcomment = tcomment[1:] + self.current_entry.tcomment += tcomment + return True + + def handle_gc(self): + """Handle a generated comment.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + if self.current_entry.comment != '': + self.current_entry.comment += '\n' + self.current_entry.comment += self.current_token[3:] + return True + + def handle_oc(self): + """Handle a file:num occurrence.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + occurrences = self.current_token[3:].split() + for occurrence in occurrences: + if occurrence != '': + try: + fil, line = occurrence.rsplit(':', 1) + if not line.isdigit(): + fil = fil + line + line = '' + self.current_entry.occurrences.append((fil, line)) + except (ValueError, AttributeError): + self.current_entry.occurrences.append((occurrence, '')) + return True + + def handle_fl(self): + """Handle a flags line.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + self.current_entry.flags += [c.strip() for c in + self.current_token[3:].split(',')] + return True + + def handle_pp(self): + """Handle a previous msgid_plural line.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + self.current_entry.previous_msgid_plural = \ + unescape(self.current_token[1:-1]) + return True + + def handle_pm(self): + """Handle a previous msgid line.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + self.current_entry.previous_msgid = \ + unescape(self.current_token[1:-1]) + return True + + def handle_pc(self): + """Handle a previous msgctxt line.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + self.current_entry.previous_msgctxt = \ + unescape(self.current_token[1:-1]) + return True + + def handle_ct(self): + """Handle a msgctxt.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + self.current_entry.msgctxt = unescape(self.current_token[1:-1]) + return True + + def handle_mi(self): + """Handle a msgid.""" + if self.current_state in ['mc', 'ms', 'mx']: + self.instance.append(self.current_entry) + self.current_entry = POEntry(linenum=self.current_line) + self.current_entry.obsolete = self.entry_obsolete + self.current_entry.msgid = unescape(self.current_token[1:-1]) + return True + + def handle_mp(self): + """Handle a msgid plural.""" + self.current_entry.msgid_plural = unescape(self.current_token[1:-1]) + return True + + def handle_ms(self): + """Handle a msgstr.""" + self.current_entry.msgstr = unescape(self.current_token[1:-1]) + return True + + def handle_mx(self): + """Handle a msgstr plural.""" + index = self.current_token[7] + value = self.current_token[self.current_token.find('"') + 1:-1] + self.current_entry.msgstr_plural[int(index)] = unescape(value) + self.msgstr_index = int(index) + return True + + def handle_mc(self): + """Handle a msgid or msgstr continuation line.""" + token = unescape(self.current_token[1:-1]) + if self.current_state == 'ct': + self.current_entry.msgctxt += token + elif self.current_state == 'mi': + self.current_entry.msgid += token + elif self.current_state == 'mp': + self.current_entry.msgid_plural += token + elif self.current_state == 'ms': + self.current_entry.msgstr += token + elif self.current_state == 'mx': + self.current_entry.msgstr_plural[self.msgstr_index] += token + elif self.current_state == 'pp': + self.current_entry.previous_msgid_plural += token + elif self.current_state == 'pm': + self.current_entry.previous_msgid += token + elif self.current_state == 'pc': + self.current_entry.previous_msgctxt += token + # don't change the current state + return False +# }}} +# class _MOFileParser {{{ + + +class _MOFileParser(object): + """ + A class to parse binary mo files. + """ + + def __init__(self, mofile, *args, **kwargs): + """ + Constructor. + + Keyword arguments: + + ``mofile`` + string, path to the mo file or its content + + ``encoding`` + string, the encoding to use, defaults to ``default_encoding`` + global variable (optional). + + ``check_for_duplicates`` + whether to check for duplicate entries when adding entries to the + file (optional, default: ``False``). + """ + self.fhandle = open(mofile, 'rb') + + klass = kwargs.get('klass') + if klass is None: + klass = MOFile + self.instance = klass( + fpath=mofile, + encoding=kwargs.get('encoding', default_encoding), + check_for_duplicates=kwargs.get('check_for_duplicates', False) + ) + + def __del__(self): + """ + Make sure the file is closed, this prevents warnings on unclosed file + when running tests with python >= 3.2. + """ + if self.fhandle: + self.fhandle.close() + + def parse(self): + """ + Build the instance with the file handle provided in the + constructor. + """ + # parse magic number + magic_number = self._readbinary('<I', 4) + if magic_number == MOFile.MAGIC: + ii = '<II' + elif magic_number == MOFile.MAGIC_SWAPPED: + ii = '>II' + else: + raise IOError('Invalid mo file, magic number is incorrect !') + self.instance.magic_number = magic_number + # parse the version number and the number of strings + version, numofstrings = self._readbinary(ii, 8) + # from MO file format specs: "A program seeing an unexpected major + # revision number should stop reading the MO file entirely" + if version not in (0, 1): + raise IOError('Invalid mo file, unexpected major revision number') + self.instance.version = version + # original strings and translation strings hash table offset + msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) + # move to msgid hash table and read length and offset of msgids + self.fhandle.seek(msgids_hash_offset) + msgids_index = [] + for i in range(numofstrings): + msgids_index.append(self._readbinary(ii, 8)) + # move to msgstr hash table and read length and offset of msgstrs + self.fhandle.seek(msgstrs_hash_offset) + msgstrs_index = [] + for i in range(numofstrings): + msgstrs_index.append(self._readbinary(ii, 8)) + # build entries + encoding = self.instance.encoding + for i in range(numofstrings): + self.fhandle.seek(msgids_index[i][1]) + msgid = self.fhandle.read(msgids_index[i][0]) + + self.fhandle.seek(msgstrs_index[i][1]) + msgstr = self.fhandle.read(msgstrs_index[i][0]) + if i == 0 and not msgid: # metadata + raw_metadata, metadata = msgstr.split(b('\n')), {} + for line in raw_metadata: + tokens = line.split(b(':'), 1) + if tokens[0] != b(''): + try: + k = tokens[0].decode(encoding) + v = tokens[1].decode(encoding) + metadata[k] = v.strip() + except IndexError: + metadata[k] = u('') + self.instance.metadata = metadata + continue + # test if we have a plural entry + msgid_tokens = msgid.split(b('\0')) + if len(msgid_tokens) > 1: + entry = self._build_entry( + msgid=msgid_tokens[0], + msgid_plural=msgid_tokens[1], + msgstr_plural=dict((k, v) for k, v in + enumerate(msgstr.split(b('\0')))) + ) + else: + entry = self._build_entry(msgid=msgid, msgstr=msgstr) + self.instance.append(entry) + # close opened file + self.fhandle.close() + return self.instance + + def _build_entry(self, msgid, msgstr=None, msgid_plural=None, + msgstr_plural=None): + msgctxt_msgid = msgid.split(b('\x04')) + encoding = self.instance.encoding + if len(msgctxt_msgid) > 1: + kwargs = { + 'msgctxt': msgctxt_msgid[0].decode(encoding), + 'msgid': msgctxt_msgid[1].decode(encoding), + } + else: + kwargs = {'msgid': msgid.decode(encoding)} + if msgstr: + kwargs['msgstr'] = msgstr.decode(encoding) + if msgid_plural: + kwargs['msgid_plural'] = msgid_plural.decode(encoding) + if msgstr_plural: + for k in msgstr_plural: + msgstr_plural[k] = msgstr_plural[k].decode(encoding) + kwargs['msgstr_plural'] = msgstr_plural + return MOEntry(**kwargs) + + def _readbinary(self, fmt, numbytes): + """ + Private method that unpack n bytes of data using format <fmt>. + It returns a tuple or a mixed value if the tuple length is 1. + """ + bytes = self.fhandle.read(numbytes) + tup = struct.unpack(fmt, bytes) + if len(tup) == 1: + return tup[0] + return tup +# }}} +# class TextWrapper {{{ + + +class TextWrapper(textwrap.TextWrapper): + """ + Subclass of textwrap.TextWrapper that backport the + drop_whitespace option. + """ + def __init__(self, *args, **kwargs): + drop_whitespace = kwargs.pop('drop_whitespace', True) + textwrap.TextWrapper.__init__(self, *args, **kwargs) + self.drop_whitespace = drop_whitespace + + def _wrap_chunks(self, chunks): + """_wrap_chunks(chunks : [string]) -> [string] + + Wrap a sequence of text chunks and return a list of lines of + length 'self.width' or less. (If 'break_long_words' is false, + some lines may be longer than this.) Chunks correspond roughly + to words and the whitespace between them: each chunk is + indivisible (modulo 'break_long_words'), but a line break can + come between any two chunks. Chunks should not have internal + whitespace; ie. a chunk is either all whitespace or a "word". + Whitespace chunks will be removed from the beginning and end of + lines, but apart from that whitespace is preserved. + """ + lines = [] + if self.width <= 0: + raise ValueError("invalid width %r (must be > 0)" % self.width) + + # Arrange in reverse order so items can be efficiently popped + # from a stack of chucks. + chunks.reverse() + + while chunks: + + # Start the list of chunks that will make up the current line. + # cur_len is just the length of all the chunks in cur_line. + cur_line = [] + cur_len = 0 + + # Figure out which static string will prefix this line. + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + # Maximum width for this line. + width = self.width - len(indent) + + # First chunk on line is whitespace -- drop it, unless this + # is the very beginning of the text (ie. no lines started yet). + if self.drop_whitespace and chunks[-1].strip() == '' and lines: + del chunks[-1] + + while chunks: + l = len(chunks[-1]) + + # Can at least squeeze this chunk onto the current line. + if cur_len + l <= width: + cur_line.append(chunks.pop()) + cur_len += l + + # Nope, this line is full. + else: + break + + # The current line is full, and the next chunk is too big to + # fit on *any* line (not just this one). + if chunks and len(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + + # If the last chunk on this line is all whitespace, drop it. + if self.drop_whitespace and cur_line and not cur_line[-1].strip(): + del cur_line[-1] + + # Convert current line back to a string and store it in list + # of all lines (return value). + if cur_line: + lines.append(indent + ''.join(cur_line)) + + return lines +# }}} +# function wrap() {{{ + + +def wrap(text, width=70, **kwargs): + """ + Wrap a single paragraph of text, returning a list of wrapped lines. + """ + if sys.version_info < (2, 6): + return TextWrapper(width=width, **kwargs).wrap(text) + return textwrap.wrap(text, width=width, **kwargs) + +# }}} + +def genKeyId(inkey): + crc = binascii.crc32(bytes(inkey)) & 0xffffffff + # Use simple ASCII characters, exclude I, l, 1 and O, 0 to avoid confusing IDs + symbols = "ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz23456789"; + outkey = "" + for keyind in range(0, 5): + outkey += symbols[(crc & 63) % len(symbols)]; + crc >>= 6; + return outkey diff --git a/solenv/bin/update-for-gettext b/solenv/bin/update-for-gettext new file mode 100644 index 000000000000..1234b248490f --- /dev/null +++ b/solenv/bin/update-for-gettext @@ -0,0 +1,76 @@ +#!/usr/bin/python + +import binascii +import polib +from os import listdir, walk, remove +from os.path import isdir, join + +import sys + +if len(sys.argv) < 2: + print(" Syntax: update-for-gettext path/to/dir/of/languages") + sys.exit(2) + +langs = [f for f in listdir(sys.argv[1]) if isdir(join(sys.argv[1], f))] + +uiline = False + +for lang in langs: + path = join(sys.argv[1],lang) + modules = [f for f in listdir(path) if isdir(join(path, f))] + for module in modules: + subpath = join(path, module) + print >> sys.stderr, "module is", subpath, lang, module + messages = None + npos = 0 + for dirpath, dirname, filenames in walk(subpath): + for filename in filenames: + ipath = join(dirpath, filename) + print >> sys.stderr, "file is", ipath + po = polib.pofile(ipath) + if len(po) != 0: + samplefile = po[0].occurrences[0][0] + if samplefile.endswith(".src") or samplefile.endswith(".ui"): + if npos == 0: + messages = po + else: + for entry in po: + messages.append(entry) + npos = npos + 1 + remove(ipath) + if npos > 0: + middle = 0 + for entry in messages: + if not len(entry.occurrences): + continue + location = entry.occurrences[0][0] + if location.endswith(".ui"): + uiline = True + else: + uiline = False + lines = entry.msgctxt.split('\n') + if uiline: + widgetid = lines[1] + typeid = lines[2] + entry.msgctxt = location[:-3] + "|" + widgetid + if typeid == "tooltip_text": + entry.msgctxt = entry.msgctxt + "|" + typeid + if entry.msgctxt == 'calloutpage|position' and entry.msgid == 'Middle': + middle = middle + 1 + if middle == 2: + entry.msgid = "Center" + else: + ctxline = lines[1] + if (ctxline.endswith("+RID_SC_FUNC_DESCRIPTIONS_START")): + ctxline = ctxline[:-len("+RID_SC_FUNC_DESCRIPTIONS_START")] + elif (ctxline.endswith("+RID_GLOBSTR_OFFSET")): + ctxline = ctxline[:-len("+RID_GLOBSTR_OFFSET")] + entry.msgctxt = ctxline + comments = entry.comment.split('\n') + keyid = entry.msgctxt + '|' + entry.msgid + comments[-1] = polib.genKeyId(keyid.encode('utf-8')) + entry.comment = "\n".join(comments) + if lang != "templates": + messages.save(join(subpath, "messages.po")) + else: + messages.save(join(subpath, "messages.pot")) |