File: fix-lupdate5.py

package info (click to toggle)
pyacidobasic 3.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,388 kB
  • sloc: python: 1,569; xml: 71; makefile: 60; sh: 7
file content (44 lines) | stat: -rwxr-xr-x 1,091 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#! /usr/bin/python3

import re,sys

htmlchar=re.compile(b'&#x([a-f0-9][a-f0-9]);')

def spanGroupVal(text):
    result=[]
    prefix=""
    found=[(h.span(), h.group(1)) for h in htmlchar.finditer(text)]
    for span, group in found:
        if group==b'c3': # 195, Ã
            prefix=group
            val=""
        else:
            if prefix:
                utf8bytes=chr(int(prefix,16))+chr(int(group,16))
                val=utf8bytes.encode("latin1").decode("utf8")
            else:
                val=chr(int(group,16))
            prefix=""
        result.append((span, group, val))
    return result

def replace(text, spv):
    curs1=0
    curs2=0
    result=b''
    for span, group, val in spv:
        curs2=span[0]
        result+=text[curs1:curs2]
        result+=val.encode("utf8")
        curs1=span[1]
    result+=text[curs1:]
    return result
   
if __name__=="__main__":
    infileName=sys.argv[1]
    outfileName=sys.argv[2]
    text=open(infileName,"rb").read()
    spv=spanGroupVal(text)
    newText=replace(text,spv)
    open(outfileName,"wb").write(newText)