File: parse.py

package info (click to toggle)
antlr4 4.9.2-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 7,328 kB
  • sloc: java: 45,008; javascript: 1,121; xml: 1,077; python: 73; cs: 71; sh: 29; makefile: 9
file content (21 lines) | stat: -rwxr-xr-x 716 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from __future__ import print_function
import codecs
import re
import sys

def main(input, output):
    code_point_re = re.compile(r'^U\+([0-9a-fA-F]+)\s*;\s*ExtendedPictographic.*$')
    code_point_range_re = re.compile(r'^U\+([0-9a-fA-F]+)\.\.U\+([0-9a-fA-F]+)\s*;\s*ExtendedPictographic.*$')

    for line in input:
        m = code_point_re.match(line)
        if m:
            print('set.add(0x' + m.group(1) + ');', file=output)
        else:
            m = code_point_range_re.match(line)
            if m:
                print('set.add(0x' + m.group(1) + ', 0x' + m.group(2) + ');', file=output)

if __name__ == '__main__':
    with codecs.open(sys.argv[1], 'r', 'utf-8') as f:
        main(f, sys.stdout)