File: utfdump.py

package info (click to toggle)
dblatex 0.3.12py3-5
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 12,880 kB
  • sloc: xml: 102,889; python: 8,274; makefile: 117; sh: 48
file content (28 lines) | stat: -rw-r--r-- 581 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from __future__ import print_function

import codecs
import sys

def main():

    decode = codecs.getdecoder("utf8")
    encode = codecs.getencoder("latin-1")

    f = open(sys.argv[1])
    lineno = 0
    for line in f:
        line = decode(line)[0]
        lineno += 1
        outline = ""
        for uchar in line:
            try:
                o = encode(uchar)[0]
            except:
                o = "?"
            print("U%04X: %s" % (ord(uchar), o))
            outline += o
        print("Line %3d: %s" % (lineno, outline))


if __name__ == "__main__":
    main()