File: po_processor.py

package info (click to toggle)
refcard 13.1.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,284 kB
  • sloc: python: 189; makefile: 108; sh: 6
file content (321 lines) | stat: -rwxr-xr-x 12,392 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#!/usr/bin/env python3
#
# po_processor.py
#
# Process RTL *.po files for debian-refcard.
# We need to enclose all RTL strings with RLE/PDF characters, and
# take care of directionality of embedded LTR strings as well.
#
# Usage:
# cd refcard
# ./run_po_processor.sh
#
# Needs the python3-polib package.
#
########################################################################
#
# Copyright (C) 2016, Omer Zak.
# This program is free software, licensed under the GNU GPL, >=3.0.
# This software comes with absolutely NO WARRANTY. Use at your own risk!
#
########################################################################
# Imports
########################################################################

import argparse
import polib
import re
import sys
import unicodedata

########################################################################
# Auxiliary functions
########################################################################

class error(Exception):
    pass

########################################################################
# Actual work functions
########################################################################

LRE = "\u202A"
RLE = "\u202B"
PDF = "\u202C"

TYPE_LEFT = 1      # Do not enclose the msgstr with RLE/PDF
TYPE_RIGHT = 2     # Enclose the msgstr with RLE/PDF
TYPE_NEUTRAL = 3   # Ignore when determining what to do with the msgstr
TYPE_EXPLICIT = 4  # Flag the msgstr for manual processing
TYPES = [None, "LEFT", "RIGHT", "NEUTRAL", "EXPLICIT"]
# Source: http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types
CHARTYPES = {
    # Strong
    "L" : TYPE_LEFT,
    "R" : TYPE_RIGHT,
    "AL" : TYPE_RIGHT,
    # Weak
    "EN" : TYPE_NEUTRAL,
    "ES" : TYPE_NEUTRAL,
    "ET" : TYPE_NEUTRAL,
    "AN" : TYPE_NEUTRAL,
    "CS" : TYPE_NEUTRAL,
    "NSM" : TYPE_NEUTRAL,
    "BN" : TYPE_NEUTRAL,
    # Neutral
    "B" : TYPE_NEUTRAL,
    "S" : TYPE_NEUTRAL,
    "WS" : TYPE_NEUTRAL,
    "ON" : TYPE_NEUTRAL,
    # Explicit Formatting
    "LRE" : TYPE_EXPLICIT,
    "LRO" : TYPE_EXPLICIT,
    "RLE" : TYPE_EXPLICIT,
    "RLO" : TYPE_EXPLICIT,
    "PDF" : TYPE_EXPLICIT,
    "LRI" : TYPE_EXPLICIT,
    "RLI" : TYPE_EXPLICIT,
    "FSI" : TYPE_EXPLICIT,
    "PDI" : TYPE_EXPLICIT,
}

TAGSPATTERN=re.compile(r"</?[a-zA-Z0-9]+>")
def classify(msgstr):
    """Determine whether the msgstr is to be enclosed by RLE/PDF pair.
       Return value - TYPE_RIGHT/TYPE_LEFT/TYPE_EXPLICIT.
    """
    tagless = TAGSPATTERN.sub("",msgstr)
    typesofar = TYPE_NEUTRAL
    for msgchr in tagless:
        chrtype = CHARTYPES[unicodedata.bidirectional(msgchr)]
        if (chrtype == TYPE_EXPLICIT):
            return TYPE_EXPLICIT
        elif (chrtype == TYPE_RIGHT):
            if (typesofar == TYPE_NEUTRAL):
                typesofar = TYPE_RIGHT
        elif (chrtype == TYPE_LEFT):
            if (typesofar == TYPE_NEUTRAL):
                typesofar = TYPE_LEFT
    return typesofar

def process(msgstr,stderr=None,verbosity=0):
    """Process the localized message.
       The return value is either (True, processedMessage)
       or (False, message)
    """
    msgtype = classify(msgstr)
    if ((verbosity > 0) and (stderr is not None)):
        stderr.write("Classified as {}: {}\n".format(TYPES[msgtype], msgstr))
    if (msgtype == TYPE_EXPLICIT):
        return((False, "Need manual inspection"))
    elif  (msgtype == TYPE_RIGHT):
        if (verbosity > 0):
            stderr.write("Will modify the string\n")
        return((True, RLE + msgstr + PDF))
    else:
        return((True, msgstr))

########################################################################
# Test code
########################################################################

def check_dependencies(module_names=["argparse","sys"]):
    """Try to import modules, whose names are given in the argument
       list.
       If all import properly, return [].
       Otherwise, return a list of modules whose import failed, together
       with failure reason.
    """
    failed = []
    for mname in module_names:
        try:
            __import__(mname)
        except Exception as e:
            failed.append((mname,e))
    return(failed)

########################################################################

class TestNumbering:
    """Keep track of current test number, and count of pass/fail in the current group of tests"""
    def __init__(self):
        self.testnum = 0
        self.passed = 0
        self.failed = 0
    def __str__(self):
        return "# Ran {} tests: {} passed, {} failed".format(self.testnum,self.passed,self.failed)
    def next(self):
        """The tests are numbered starting from 1 and ending at N if there are N tests."""
        testno = self.testnum
        self.testnum += 1
        return testno
    def post(self,passed):
        """Post the results of a test.
           If passed is True, increment self.passed.
           Otherwise increment self.failed
        """
        if (passed):
            self.passed += 1
        else:
            self.failed += 1
        assert (self.testnum == (self.passed + self.failed)), "Inconsistency: {} tests, {} passed, {} failed".format(self.testnum,self.passed,self.failed)

########################################################################

def increment_testnum(testnum):
    """Auto-increment test number if it is an instance of TestNumbering.
       The return value is the test number.
    """
    if (isinstance(testnum,TestNumbering)):
        return testnum.next()
    else:
        return testnum

def run_test(testnum,desc,exp,actlambda,stdout=sys.stdout):
    """Boilerplate TAP-compatible test runner.
       testnum - test number, autoincremented if is a list (and hence modifiable).
       desc - test description
       exp - expected value
       actlambda - actual value (written in the form of a parameterless lambda function returning the real actual value if invoked)
       stdout - write a test report

       Return value: True if the test passed, False if the test failed or threw an exception.
    """
    import pprint
    testno = increment_testnum(testnum)

    diagstr = ""
    actval = None
    test_result = False
    try:
        actval = actlambda()
    except Exception as ex:
        stdout.write("not ")
        diagstr = "# exp: %s\n# exception: %s\n" % (pprint.pformat(exp).replace("\n","\n## "),pprint.pformat(ex).replace("\n","\n## "))
    else:
        test_result = (exp == actval)
        if (not test_result):
            stdout.write("not ")
            diagstr = "# exp: %s\n# act: %s\n" % (pprint.pformat(exp).replace("\n","\n## "),pprint.pformat(actval).replace("\n","\n## "))
    stdout.write("ok %d %s\n" % (testno,desc))
    stdout.write(diagstr)
    return test_result   # The caller is responsible for performing testnum.post(), if any.

def run_test_throwing_exception(testnum,desc,expexception,actlambda,stdout=sys.stdout):
    """Same as run_test() above, except that we expect the tested code
       to raise an exception.
       expexception - the expected exception.
           If its class is 'type', then we check only the exception class.
           If its class is a subclass of Exception, then we verify that we got the exact exception we wanted. (NOT IMPLEMENTED)
       other arguments - same as for run_test() above.

       Return value: True if the test threw the expected exception, False otherwise.
    """
    import pprint
    testno = increment_testnum(testnum)

    diagstr = ""
    actval = None
    test_result = False
    try:
        actval = actlambda()
    except Exception as ex:
        if (expexception.__class__ == type):
            test_result = (ex.__class__ == expexception)
        else:
            #test_result = (ex == expexception)
            test_result = (ex.__class__ == expexception.__class__) # As a rule, subclasses of Exception do not have an usable __eq__() implementation.
        if (not test_result):
            diagstr = "# exp: %s\n# act: %s\n" % (pprint.pformat(expexception).replace("\n","\n## "),pprint.pformat(ex).replace("\n","\n## "))
    else:
        # No exception occurred.
        stdout.write("not ")
        diagstr = "# exp: %s\n# act: (no exception was thrown)\n" % pprint.pformat(expexception).replace("\n","\n## ")
    stdout.write("ok %d %s\n" % (testno,desc))
    stdout.write(diagstr)
    return test_result

########################################################################

def run_self_tests(args,stdin=sys.stdin,stdout=sys.stdout,stderr=sys.stderr):
    testnum = TestNumbering()
    # Meta-tests (testing the test functions themselves)
    testnum.post(run_test(testnum,"Meta-test (should pass)",4,(lambda: 2+2),stdout=stdout))
    testnum.post(not run_test(testnum,"Meta-test (should fail)",1,(lambda: 0),stdout=stdout))
    testnum.post(not run_test(testnum,"Meta-test (should fail): due to an exception",2213,(lambda: 1/0),stdout=stdout))
    testnum.post(run_test_throwing_exception(testnum,"Meta-test (should pass): by raising the correct exception",ZeroDivisionError,(lambda: 1/0),stdout=stdout))
    testnum.post(not run_test_throwing_exception(testnum,"Meta-test (should fail): by raising the wrong exception",OverflowError,(lambda: 1/0),stdout=stdout))

    # Standard tests for py3filter.py derived code
    testnum.post(run_test(testnum,"Check dependencies",[],check_dependencies,stdout=stdout))

    # Actual tests
    testnum.post(run_test(testnum,"pure LTR string", TYPE_LEFT, (lambda: classify("abc")),stdout=stdout))
    testnum.post(run_test(testnum,"pure RTL string", TYPE_RIGHT, (lambda: classify("\u05d0\u05d1\u05d2")),stdout=stdout))
    testnum.post(run_test(testnum,"tagged RTL string", TYPE_RIGHT, (lambda: classify("<tag>\u05d0\u05d1\u05d2</tag>")),stdout=stdout))
    # None, for now.
    stdout.write("# Summary: {}\n".format(str(testnum)))
    return (testnum.failed == 0)

########################################################################
# Main Program
########################################################################

def main(stdin,outfname,stderr,args):
    po = polib.pofile(stdin.read())
    for entry in po:
        (flg, newmsgstr) = process(entry.msgstr,stderr=stderr,verbosity=args.verbose)
        if (flg):
            entry.msgstr = newmsgstr
        else:
            stderr.write("Need manual inspection due to: {}\nContext: {}\nOriginal string: {}\n\n".format(newmsgstr, entry.msgctxt, entry.msgstr))
    po.save(outfname)
    stderr.write("Wrote to %s\n" % outfname)

########################################################################

if (__name__ == '__main__'):
    parser = argparse.ArgumentParser(
        description="Processor of *.po files for RTL languages",
        epilog="TBD")

    parser.add_argument("-i","--input",
                        type=argparse.FileType('r', encoding='UTF-8'),
                        dest="input",
                        default=sys.stdin,
                        help="Name of input file.  Default is sys.stdin.")
    parser.add_argument("-o","--output",
                        dest="outfname",
                        help="Name of output file.  Default is sys.stdout.")
    parser.add_argument("-e","--error",
                        type=argparse.FileType('w', encoding='UTF-8'),
                        dest="error",
                        default=sys.stderr,
                        help="Name of error output file.  Default is sys.stderr.")

    parser.add_argument("-t","--test",action="store_true",
                        dest="test",
                        help="Run self tests.")
    parser.add_argument("-v","--verbose",action="count",default=0,
                        dest="verbose",
                        help="Increase output verbosity.")

    args = parser.parse_args()


    # Did we want to run self tests?

    if (args.test):
        sys.exit(0 if run_self_tests(args,stdin=args.input,stdout=open(args.outfname, 'w', encoding='UTF-8'),stderr=args.error) else 1)

    ####################################################################
    # ACTUAL WORK
    ####################################################################
    sys.exit(main(stdin=args.input,outfname=args.outfname,stderr=args.error,args=args))

else:
    # I have been imported as a module.
    pass

########################################################################
# End of po_processor.py