File: po_processor.py

package info (click to toggle)
refcard 13.1.1
links: PTS, VCS
area: main
in suites: forky, sid
size: 2,284 kB
sloc: python: 189; makefile: 108; sh: 6
file content (321 lines) | stat: -rwxr-xr-x 12,392 bytes
parent folder | download | duplicates (6)
#!/usr/bin/env python3
#
# po_processor.py
#
# Process RTL *.po files for debian-refcard.
# We need to enclose all RTL strings with RLE/PDF characters, and
# take care of directionality of embedded LTR strings as well.
#
# Usage:
# cd refcard
# ./run_po_processor.sh
#
# Needs the python3-polib package.
#
########################################################################
#
# Copyright (C) 2016, Omer Zak.
# This program is free software, licensed under the GNU GPL, >=3.0.
# This software comes with absolutely NO WARRANTY. Use at your own risk!
#
########################################################################
# Imports
########################################################################

import argparse
import polib
import re
import sys
import unicodedata

########################################################################
# Auxiliary functions
########################################################################

class error(Exception):
    pass

########################################################################
# Actual work functions
########################################################################

LRE = "\u202A"
RLE = "\u202B"
PDF = "\u202C"

TYPE_LEFT = 1      # Do not enclose the msgstr with RLE/PDF
TYPE_RIGHT = 2     # Enclose the msgstr with RLE/PDF
TYPE_NEUTRAL = 3   # Ignore when determining what to do with the msgstr
TYPE_EXPLICIT = 4  # Flag the msgstr for manual processing
TYPES = [None, "LEFT", "RIGHT", "NEUTRAL", "EXPLICIT"]
# Source: http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types
CHARTYPES = {
    # Strong
    "L" : TYPE_LEFT,
    "R" : TYPE_RIGHT,
    "AL" : TYPE_RIGHT,
    # Weak
    "EN" : TYPE_NEUTRAL,
    "ES" : TYPE_NEUTRAL,
    "ET" : TYPE_NEUTRAL,
    "AN" : TYPE_NEUTRAL,
    "CS" : TYPE_NEUTRAL,
    "NSM" : TYPE_NEUTRAL,
    "BN" : TYPE_NEUTRAL,
    # Neutral
    "B" : TYPE_NEUTRAL,
    "S" : TYPE_NEUTRAL,
    "WS" : TYPE_NEUTRAL,
    "ON" : TYPE_NEUTRAL,
    # Explicit Formatting
    "LRE" : TYPE_EXPLICIT,
    "LRO" : TYPE_EXPLICIT,
    "RLE" : TYPE_EXPLICIT,
    "RLO" : TYPE_EXPLICIT,
    "PDF" : TYPE_EXPLICIT,
    "LRI" : TYPE_EXPLICIT,
    "RLI" : TYPE_EXPLICIT,
    "FSI" : TYPE_EXPLICIT,
    "PDI" : TYPE_EXPLICIT,
}

TAGSPATTERN=re.compile(r"</?[a-zA-Z0-9]+>")
def classify(msgstr):
    """Determine whether the msgstr is to be enclosed by RLE/PDF pair.
       Return value - TYPE_RIGHT/TYPE_LEFT/TYPE_EXPLICIT.
    """
    tagless = TAGSPATTERN.sub("",msgstr)
    typesofar = TYPE_NEUTRAL
    for msgchr in tagless:
        chrtype = CHARTYPES[unicodedata.bidirectional(msgchr)]
        if (chrtype == TYPE_EXPLICIT):
            return TYPE_EXPLICIT
        elif (chrtype == TYPE_RIGHT):
            if (typesofar == TYPE_NEUTRAL):
                typesofar = TYPE_RIGHT
        elif (chrtype == TYPE_LEFT):
            if (typesofar == TYPE_NEUTRAL):
                typesofar = TYPE_LEFT
    return typesofar

def process(msgstr,stderr=None,verbosity=0):
    """Process the localized message.
       The return value is either (True, processedMessage)
       or (False, message)
    """
    msgtype = classify(msgstr)
    if ((verbosity > 0) and (stderr is not None)):
        stderr.write("Classified as {}: {}\n".format(TYPES[msgtype], msgstr))
    if (msgtype == TYPE_EXPLICIT):
        return((False, "Need manual inspection"))
    elif  (msgtype == TYPE_RIGHT):
        if (verbosity > 0):
            stderr.write("Will modify the string\n")
        return((True, RLE + msgstr + PDF))
    else:
        return((True, msgstr))

########################################################################
# Test code
########################################################################

def check_dependencies(module_names=["argparse","sys"]):
    """Try to import modules, whose names are given in the argument
       list.
       If all import properly, return [].
       Otherwise, return a list of modules whose import failed, together
       with failure reason.
    """
    failed = []
    for mname in module_names:
        try:
            __import__(mname)
        except Exception as e:
            failed.append((mname,e))
    return(failed)

########################################################################

class TestNumbering:
    """Keep track of current test number, and count of pass/fail in the current group of tests"""
    def __init__(self):
        self.testnum = 0
        self.passed = 0
        self.failed = 0
    def __str__(self):
        return "# Ran {} tests: {} passed, {} failed".format(self.testnum,self.passed,self.failed)
    def next(self):
        """The tests are numbered starting from 1 and ending at N if there are N tests."""
        testno = self.testnum
        self.testnum += 1
        return testno
    def post(self,passed):
        """Post the results of a test.
           If passed is True, increment self.passed.
           Otherwise increment self.failed
        """
        if (passed):
            self.passed += 1
        else:
            self.failed += 1
        assert (self.testnum == (self.passed + self.failed)), "Inconsistency: {} tests, {} passed, {} failed".format(self.testnum,self.passed,self.failed)

########################################################################

def increment_testnum(testnum):
    """Auto-increment test number if it is an instance of TestNumbering.
       The return value is the test number.
    """
    if (isinstance(testnum,TestNumbering)):
        return testnum.next()
    else:
        return testnum

def run_test(testnum,desc,exp,actlambda,stdout=sys.stdout):
    """Boilerplate TAP-compatible test runner.
       testnum - test number, autoincremented if is a list (and hence modifiable).
       desc - test description
       exp - expected value
       actlambda - actual value (written in the form of a parameterless lambda function returning the real actual value if invoked)
       stdout - write a test report

       Return value: True if the test passed, False if the test failed or threw an exception.
    """
    import pprint
    testno = increment_testnum(testnum)

    diagstr = ""
    actval = None
    test_result = False
    try:
        actval = actlambda()
    except Exception as ex:
        stdout.write("not ")
        diagstr = "# exp: %s\n# exception: %s\n" % (pprint.pformat(exp).replace("\n","\n## "),pprint.pformat(ex).replace("\n","\n## "))
    else:
        test_result = (exp == actval)
        if (not test_result):
            stdout.write("not ")
            diagstr = "# exp: %s\n# act: %s\n" % (pprint.pformat(exp).replace("\n","\n## "),pprint.pformat(actval).replace("\n","\n## "))
    stdout.write("ok %d %s\n" % (testno,desc))
    stdout.write(diagstr)
    return test_result   # The caller is responsible for performing testnum.post(), if any.

def run_test_throwing_exception(testnum,desc,expexception,actlambda,stdout=sys.stdout):
    """Same as run_test() above, except that we expect the tested code
       to raise an exception.
       expexception - the expected exception.
           If its class is 'type', then we check only the exception class.
           If its class is a subclass of Exception, then we verify that we got the exact exception we wanted. (NOT IMPLEMENTED)
       other arguments - same as for run_test() above.

       Return value: True if the test threw the expected exception, False otherwise.
    """
    import pprint
    testno = increment_testnum(testnum)

    diagstr = ""
    actval = None
    test_result = False
    try:
        actval = actlambda()
    except Exception as ex:
        if (expexception.__class__ == type):
            test_result = (ex.__class__ == expexception)
        else:
            #test_result = (ex == expexception)
            test_result = (ex.__class__ == expexception.__class__) # As a rule, subclasses of Exception do not have an usable __eq__() implementation.
        if (not test_result):
            diagstr = "# exp: %s\n# act: %s\n" % (pprint.pformat(expexception).replace("\n","\n## "),pprint.pformat(ex).replace("\n","\n## "))
    else:
        # No exception occurred.
        stdout.write("not ")
        diagstr = "# exp: %s\n# act: (no exception was thrown)\n" % pprint.pformat(expexception).replace("\n","\n## ")
    stdout.write("ok %d %s\n" % (testno,desc))
    stdout.write(diagstr)
    return test_result

########################################################################

def run_self_tests(args,stdin=sys.stdin,stdout=sys.stdout,stderr=sys.stderr):
    testnum = TestNumbering()
    # Meta-tests (testing the test functions themselves)
    testnum.post(run_test(testnum,"Meta-test (should pass)",4,(lambda: 2+2),stdout=stdout))
    testnum.post(not run_test(testnum,"Meta-test (should fail)",1,(lambda: 0),stdout=stdout))
    testnum.post(not run_test(testnum,"Meta-test (should fail): due to an exception",2213,(lambda: 1/0),stdout=stdout))
    testnum.post(run_test_throwing_exception(testnum,"Meta-test (should pass): by raising the correct exception",ZeroDivisionError,(lambda: 1/0),stdout=stdout))
    testnum.post(not run_test_throwing_exception(testnum,"Meta-test (should fail): by raising the wrong exception",OverflowError,(lambda: 1/0),stdout=stdout))

    # Standard tests for py3filter.py derived code
    testnum.post(run_test(testnum,"Check dependencies",[],check_dependencies,stdout=stdout))

    # Actual tests
    testnum.post(run_test(testnum,"pure LTR string", TYPE_LEFT, (lambda: classify("abc")),stdout=stdout))
    testnum.post(run_test(testnum,"pure RTL string", TYPE_RIGHT, (lambda: classify("\u05d0\u05d1\u05d2")),stdout=stdout))
    testnum.post(run_test(testnum,"tagged RTL string", TYPE_RIGHT, (lambda: classify("<tag>\u05d0\u05d1\u05d2</tag>")),stdout=stdout))
    # None, for now.
    stdout.write("# Summary: {}\n".format(str(testnum)))
    return (testnum.failed == 0)

########################################################################
# Main Program
########################################################################

def main(stdin,outfname,stderr,args):
    po = polib.pofile(stdin.read())
    for entry in po:
        (flg, newmsgstr) = process(entry.msgstr,stderr=stderr,verbosity=args.verbose)
        if (flg):
            entry.msgstr = newmsgstr
        else:
            stderr.write("Need manual inspection due to: {}\nContext: {}\nOriginal string: {}\n\n".format(newmsgstr, entry.msgctxt, entry.msgstr))
    po.save(outfname)
    stderr.write("Wrote to %s\n" % outfname)

########################################################################

if (__name__ == '__main__'):
    parser = argparse.ArgumentParser(
        description="Processor of *.po files for RTL languages",
        epilog="TBD")

    parser.add_argument("-i","--input",
                        type=argparse.FileType('r', encoding='UTF-8'),
                        dest="input",
                        default=sys.stdin,
                        help="Name of input file.  Default is sys.stdin.")
    parser.add_argument("-o","--output",
                        dest="outfname",
                        help="Name of output file.  Default is sys.stdout.")
    parser.add_argument("-e","--error",
                        type=argparse.FileType('w', encoding='UTF-8'),
                        dest="error",
                        default=sys.stderr,
                        help="Name of error output file.  Default is sys.stderr.")

    parser.add_argument("-t","--test",action="store_true",
                        dest="test",
                        help="Run self tests.")
    parser.add_argument("-v","--verbose",action="count",default=0,
                        dest="verbose",
                        help="Increase output verbosity.")

    args = parser.parse_args()


    # Did we want to run self tests?

    if (args.test):
        sys.exit(0 if run_self_tests(args,stdin=args.input,stdout=open(args.outfname, 'w', encoding='UTF-8'),stderr=args.error) else 1)

    ####################################################################
    # ACTUAL WORK
    ####################################################################
    sys.exit(main(stdin=args.input,outfname=args.outfname,stderr=args.error,args=args))

else:
    # I have been imported as a module.
    pass

########################################################################
# End of po_processor.py