File: hfst_commandline.py

package info (click to toggle)
hfst 3.16.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 14,532 kB
  • sloc: cpp: 101,875; sh: 6,717; python: 5,225; yacc: 4,985; lex: 2,900; makefile: 2,017; xml: 6
file content (169 lines) | stat: -rw-r--r-- 6,130 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
from sys import argv, stdin, stdout
from hfst import ImplementationType, HfstInputStream, HfstOutputStream

# check if option opt is listed in short_getopts or long_getopts
# '-' chars are stripped from the beginning of opt before comparing
# ':' chars in short_getopts and '=' chars in long_getopts are ignored
def _check_option(opt, short_getopts, long_getopts, errmsg='TODO'):
    opt_ = opt.lstrip('-')
    if (len(opt_) == 1 and opt_ in short_getopts):
        return
    for longopt in long_getopts:
        if opt_ in longopt:
            return
    raise RuntimeError('argument not recognized: ' + opt)

def get_implementation_type(val):
    if val == 'sfst':
        return ImplementationType.SFST_TYPE
    elif val == 'openfst-tropical':
        return ImplementationType.TROPICAL_OPENFST_TYPE
    elif val == 'foma':
        return ImplementationType.FOMA_TYPE
    else:
        raise RuntimeError('type not recognized: ' + val)

# check command line parameters:
# - that any optional arguments are allowed (given as --foo FOO or --foo=FOO)
# - that parameters are listed in short_getopts or long_getopts
# - that number of free parameters is not bigger than free_params
def hfst_getopt(short_getopts, long_getopts, free_params=0, errmsg='TODO'):
    import getopt
    global argv
    options = getopt.gnu_getopt(argv[1:], short_getopts, long_getopts)
    for opt in options[0]:
        _check_option(opt[0], short_getopts, long_getopts, errmsg)
    if len(options) == 2:
        if len(options[1]) > free_params:
            raise RuntimeError('too many free parameters given (' + str(len(options[1])) + '), maximum is ' + str(free_params))
    return options

def _get_input_stream(filename, stream_type):
    if stream_type != 'hfst' and stream_type != 'text':
        raise RuntimeError('stream_type ' + stream_type + 'not recognized')
    if filename == '-':
        if stream_type == 'hfst':
            return HfstInputStream()
        else:
            return stdin
    elif filename != None:
        if stream_type == 'hfst':
            return HfstInputStream(filename)
        else:
            return open(filename, 'r') # TODO: encoding?
    else:
        return None
    
def _get_output_stream(filename, stream_type, impl=None):
    if stream_type != 'hfst' and stream_type != 'text':
        raise RuntimeError('stream_type ' + stream_type + 'not recognized')
    if filename == '-':
        if stream_type == 'hfst':
            return HfstOutputStream(type=impl)
        else:
            return stdout
    elif filename != None:
        if stream_type == 'hfst':
            return HfstOutputStream(filename, type=impl)
        else:
            return open(filename, 'w') # TODO: encoding?
    else:
        return None

def _get_one_output_stream(options, stream_type, impl=None):
    explicit_file=None
    stream=None
    name='TODO'
    # 1) given with -o or --output
    for opt in options[0]:
        if opt[0] == '-o' or opt[0] == '--output':
            explicit_file = opt[1]
            stream = _get_output_stream(explicit_file, stream_type, impl)
            return (stream, name)
    # 2) not given, defaults to standard output
    stream = _get_output_stream('-', stream_type, impl)
    return (stream, name)

# any number of free arguments may be given but none will be
# interpreted as OFILE
def get_one_hfst_output_stream(options, impl):
    return _get_one_output_stream(options, 'hfst', impl)

def get_one_text_output_stream(options):
    return _get_one_output_stream(options, 'text')

def _get_one_input_stream(options, stream_type):
    explicit_file=None
    arg=None
    stream=None
    name='TODO'
    # 1) given with -i or --input (overrides free argument)
    for opt in options[0]:
        if opt[0] == '-i' or opt[0] == '--input':
            explicit_file = opt[1]
            stream = _get_input_stream(explicit_file, stream_type)
            return (stream, name)
    if len(options) == 2:
        # 2) given as free argument
        if len(options[1]) >= 1:
            arg = options[1][0]
            stream = _get_input_stream(arg, stream_type)
            return (stream, name)
    # 3) not given, defaults to standard input
    stream = _get_input_stream('-', stream_type)
    return (stream, name)

def get_one_text_input_stream(options):
    return _get_one_input_stream(options, 'text')

# any number of free arguments may be given but the first one
# will be interpreted as IFILE (unless -i or --input is used)
def get_one_hfst_input_stream(options):
    return _get_one_input_stream(options, 'hfst')

# no more than two free arguments may be given and they are always
# interpreted as IFILE1 and IFILE2 (unless -1, -2, --input1 or --input2 are used)
def get_two_hfst_input_streams(options):
    explicit_ifile1=None
    explicit_ifile2=None
    arg1=None
    arg2=None
    name1='TODO'
    name2='TODO'
    for opt in options[0]:
        if opt[0] == '-1' or opt[0] == '--input1':
            explicit_ifile1 = opt[1] 
        elif opt[0] == '-2' or opt[0] == '--input2':
            explicit_ifile2 = opt[1]
        else:
            pass
    # free arguments were given
    if len(options) == 2:
        # at least one
        if len(options[1]) >= 1:
            arg1 = options[1][0]
            # and a second one
        if len(options[1]) == 2:
            arg2 = options[1][1]
    istr1 = None
    istr2 = None
    istr1 = _get_input_stream(explicit_ifile1, 'hfst')
    istr2 = _get_input_stream(explicit_ifile2, 'hfst')
    if istr1 != None and istr2 != None:
        pass
    elif istr1 == None and istr2 != None:
        if arg1 == None:
            arg1 = '-'
        istr1 = _get_input_stream(arg1, 'hfst')
    elif istr1 != None and istr2 == None:
        if arg1 == None:
            arg1 = '-'
        istr2 = _get_input_stream(arg1, 'hfst')
    else:
        if arg2 == None:
            istr1 = _get_input_stream('-', 'hfst')
            istr2 = _get_input_stream(arg1, 'hfst')
        else:
            istr1 = _get_input_stream(arg1, 'hfst')
            istr2 = _get_input_stream(arg2, 'hfst')
    return ((istr1, name1), (istr2, name2))