File: mergepo.py

package info (click to toggle)
lyx 2.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 138,444 kB
  • sloc: cpp: 244,268; ansic: 106,398; xml: 72,791; python: 39,384; sh: 7,666; makefile: 6,584; pascal: 2,143; perl: 2,101; objc: 1,084; tcl: 163; sed: 16
file content (271 lines) | stat: -rwxr-xr-x 9,660 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#! /usr/bin/python3
# -*- coding: utf-8 -*-

# file mergepo.py
# This file is part of LyX, the document processor.
# Licence details can be found in the file COPYING.

# author Georg Baum

# Full author contact details are available in file CREDITS

# This script takes missing translations from another set of po files and
# merges them into the po files in this source tree.


import os, re, string, sys
import io
import polib
from optparse import OptionParser


def trim_eol(line):
    " Remove end of line char."
    if line[-1:] == '\n':
        return line[:-1]
    else:
        # file with no EOL in last line
        return line


def read(input):
    " Read utf8 input file and strip lineendings."
    lines = list()
    while 1:
        line = input.readline()
        if not line:
            break
        line = trim_eol(line)
        lines.append(line)
    return lines


def parse_msg(lines):
    " Extracts msgid or msgstr from lines."
    if len(lines) < 1:
        return ''
    i = lines[0].find('"')
    if i < 0:
        return ''
    msg = lines[0][i:].strip('"')
    for i in range(1, len(lines)):
        j = lines[i].find('"')
        if j < 0:
            return ''
        msg = msg + lines[i][j:].strip('"')
    return polib.unescape(msg)


def translate(msgid, flags, msgstr_lines, po2, options):
    msgstr = parse_msg(msgstr_lines)
    if options.overwrite:
        other = po2.find(msgid)
        if not other:
            return 0
        if not other.translated():
            return 0
        if msgstr == other.msgstr:
            return 0
    else:
        if msgstr != '' and not u'fuzzy' in flags:
            return 0
        other = po2.find(msgid)
        if not other:
            return 0
        if not other.translated():
            return 0
    if options.nonnull and other.msgstr == other.msgid:
        return 0
    msgstr = other.msgstr
    if 'fuzzy' in other.flags:
        if not u'fuzzy' in flags:
            flags.append(u'fuzzy')
    else:
        if u'fuzzy' in flags:
            flags.remove(u'fuzzy')
    obsolete = (msgstr_lines[0].find('#~') == 0)
    j = msgstr_lines[0].find('"')
    # must not assign to msgstr_lines, because that would not be seen by our caller
    new_lines = polib.wrap(msgstr_lines[0][0:j+1] + polib.escape(msgstr), 76, drop_whitespace = False)
    del msgstr_lines[:]
    for i in range(0, len(new_lines)):
        if i == 0:
            msgstr_lines.append(new_lines[i] + '"')
        elif obsolete:
            msgstr_lines.append('#~ "' + new_lines[i] + '"')
        else:
            msgstr_lines.append('"' + new_lines[i] + '"')
    return 1


def mergepo_polib(target, source, options):
    changed = 0
    po1 = polib.pofile(target)
    po2 = polib.pofile(source)
    if options.overwrite:
        for entry in po1.entries():
            other = po2.find(entry.msgid, include_obsolete_entries=True)
            if not other:
                continue
            if options.nonnull and other.msgstr == other.msgid:
                continue
            if other.translated() and other.msgstr != entry.msgstr:
                entry.msgstr = other.msgstr
                if 'fuzzy' in other.flags:
                    if not 'fuzzy' in entry.flags:
                        entry.flags.append('fuzzy')
                else:
                    if 'fuzzy' in entry.flags:
                        entry.flags.remove('fuzzy')
                changed = changed + 1
    else:
        for entry in po1.untranslated_entries():
            other = po2.find(entry.msgid, include_obsolete_entries=True)
            if not other:
                continue
            if options.nonnull and other.msgstr == other.msgid:
                continue
            if other.translated():
                entry.msgstr = other.msgstr
                changed = changed + 1
    if changed > 0:
        po1.save(target)
    return changed


def mergepo_minimaldiff(target, source, options):
    changed = 0
    po2 = polib.pofile(source)
    target_enc = polib.detect_encoding(target)
    # for utf8 files we can use our self written parser to minimize diffs,
    # otherwise we need to use polib
    if not target_enc in ['UTF-8', 'utf-8', 'utf_8']:
        raise
    # open file with universal newlines, since it can happen that we are
    # on unix, but the file has been written on windows or vice versa.
    po1 = io.open(target, 'r', encoding='utf_8', newline=None)
    oldlines = read(po1)
    po1.close()
    newlines = []
    in_msgid = False
    in_msgstr = False
    flags = []
    msgstr_lines = []
    msgid_lines = []
    msgid = ''
    for line in oldlines:
        if in_msgid:
            if line.find('"') == 0 or line.find('#~ "') == 0:
                msgid_lines.append(line)
            else:
                in_msgid = False
                msgid = parse_msg(msgid_lines)
        elif in_msgstr:
            if line.find('"') == 0 or line.find('#~ "') == 0:
                msgstr_lines.append(line)
            else:
                in_msgstr = False
                changed = changed + translate(msgid, flags, msgstr_lines, po2, options)
                if len(flags) > 0:
                    flagline = u'#, ' + u', '.join(flags)
                    newlines.append(flagline)
                    flags = []
                newlines.extend(msgid_lines)
                newlines.extend(msgstr_lines)
                msgid_lines = []
                msgstr_lines = []
                msgid = ''
        if not in_msgid and not in_msgstr:
            if line.find('#,') == 0 and len(flags) == 0:
                flags = line[2:].strip().split(u', ')
            elif line.find('msgid') == 0 or line.find('#~ msgid') == 0:
                msgid_lines.append(line)
                in_msgid = True
            elif line.find('msgstr') == 0 or line.find('#~ msgstr') == 0:
                if line.find('msgstr[') == 0 or line.find('#~ msgstr[') == 0:
                    # plural forms are not implemented
                    raise
                msgstr_lines.append(line)
                in_msgstr = True
            else:
                newlines.append(line)
    if msgid != '':
        # the file ended with a msgstr
        changed = changed + translate(msgid, flags, msgstr_lines, po2, options)
        if len(flags) > 0:
            flagline = u'#, ' + u', '.join(flags)
            newlines.append(flagline)
            flags = []
        newlines.extend(msgid_lines)
        newlines.extend(msgstr_lines)
        msgid_lines = []
        msgstr_lines = []
        msgid = ''
    if changed > 0:
        # we store .po files with unix line ends in git,
        # so do always write them even on windows
        po1 = io.open(target, 'w', encoding='utf_8', newline='\n')
        for line in newlines:
            po1.write(line + '\n')
    return changed


def mergepo(target, source, options):
    if not os.path.exists(source):
        sys.stderr.write('Skipping %s since %s does not exist.\n' % (target, source))
        return
    if not os.path.exists(target):
        sys.stderr.write('Skipping %s since %s does not exist.\n' % (target, target))
        return
    sys.stderr.write('Merging %s into %s: ' % (source, target))
    try:
        changed = mergepo_minimaldiff(target, source, options)
        sys.stderr.write('Updated %d translations with minimal diff.\n' % changed)
    except Exception as e:
        sys.stderr.write('Unable to use minimal diff: %s\n' % e)
        changed = mergepo_polib(target, source, options)
        sys.stderr.write('Updated %d translations using polib.\n' % changed)


def main(argv):

    parser = OptionParser(description = """This script reads translations from .po files in the given source directory
and adds all translations that do not already exist to the corresponding .po
files in the target directory. It is recommended to remerge strings from the
source code before running this script. Otherwise translations that are not
yet in the target .po files are not updated.""", usage = "Usage: %prog [options] sourcedir")
    parser.add_option("-t", "--target", dest="target",
                      help="target directory containing .po files. If missing, it is determined from the script location.")
    parser.add_option("-l", "--language", dest="language",
                      help="language for which translations are merged (if missing, all languages are merged)")
    parser.add_option("-o", "--overwrite", action="store_true", dest="overwrite", default=False,
                      help="overwrite existing target translations with source translations (if missing, only new translations are added)")
    parser.add_option("-n", "--nonnull", action="store_true", dest="nonnull", default=False,
                      help="do not update target translations with source translations that are identical to the untranslated text)")
    (options, args) = parser.parse_args(argv)
    if len(args) <= 1:
        parser.print_help()
        return 0

    toolsdir = os.path.dirname(args[0])
    if options.target:
        podir1 = os.path.abspath(options.target)
    else:
        podir1 = os.path.normpath(os.path.join(toolsdir, '../../po'))
    podir2 = os.path.abspath(args[1])

    if options.language:
        name = options.language + '.po'
        mergepo(os.path.join(podir1, name), os.path.join(podir2, name), options)
    else:
        for i in os.listdir(podir1):
            (base, ext) = os.path.splitext(i)
            if ext != ".po":
                continue
            mergepo(os.path.join(podir1, i), os.path.join(podir2, i), options)

    return 0


if __name__ == "__main__":
    main(sys.argv)