File: iu_mien_hier.py

package info (click to toggle)
w3af 1.0-rc3svn3489-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd, squeeze, wheezy
  • size: 59,908 kB
  • ctags: 16,916
  • sloc: python: 136,990; xml: 63,472; sh: 153; ruby: 94; makefile: 40; asm: 35; jsp: 32; perl: 18; php: 5
file content (138 lines) | stat: -rw-r--r-- 6,558 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python
# -*- coding: utf8 -*-

# Natural Language Toolkit: Toolbox data file parser
#
# Copyright (C) 2001-2006 NLTK Project
# Author: Greg Aumann <greg_aumann@sil.org>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT

"""Grammar for the Toolbox MDF Alternate Hierarchy."""

# this dictionary lists all the markers that can occur in a given section
# of a shoebox record. The order is not used in parsing but may be when
# outputting a record. 

# the fields in the first tuple mark the start of nonterminal.
# Each field can occur only once and all those must
# occur before any other field in a nonterminal
# otherwise they are interpreted as marking the start
# of another one of the same nonterminal
# Fields in the second tuple alse can occur in that nonterminal.
# They must occur after those in the first tuple in 
# a given instance of the nonterminal

grammar = {
        'toolbox':   (('_sh',), ('_DateStampHasFourDigitYear', 'entry')),
        'entry':       (('lx',), ('hm', 'id', 'lc', 'ph', 'sh', 'mr', 'variant', 'sense', 'bw', 'etym',
                               'paradigm', 'st', 'subentry', 'dt')),
        'subentry': (('se',), ('hm', 'id', 'lc', 'ph', 'mr', 'variant', 'sense', 'bw', 'etym', 
                                'paradigm', 'st')),
        'variant':   (('va',), ('vn', 've', 'vr')),
        'sense':       (('sn', 'ps', 'pn'), ('gv', 'dv',
                                'chingloss', 'dn', 'chinrev', 'wn',
                                'ge', 'de', 're', 'we',
                                'gr', 'dr', 'rr', 'wr',
                                'lt', 'sc', 'example', 'usage', 'encyc', 'only',
                                'lexfunc', 'sy', 'an', 'crossref', 'mn', 'tb', 'sd', 'is', 'th', 'notes', 'so', 'bb')),
        'chingloss': (('gn',), ('gp',)),
        'chinrev':   (('rn',), ('rp',)),
        'example':   (('rf', 'xv'), ('xn', 'xe', 'xr')),
        'usage':       (('uv', 'un', 'ue'), ('ur',)),
        'encyc':       (('ev', 'en', 'ee'), ('er',)),
        'only':         (('ov', 'on', 'oe'), ('or',)),
        'lexfunc':   (('lf',), ('lexvalue',)),
        'lexvalue': (('lv',), ('ln', 'le', 'lr')),
        'crossref': (('cf',), ('cn', 'ce', 'cr')),
        'notes':       (('nt', 'np', 'ng', 'nd', 'na', 'ns', 'nq'), ()),
        'etym':         (('et',), ('eg', 'es', 'ec')),
        'paradigm': (('pd', 'pdl', 'pdv'), ('pdn', 'pde', 'pdr',
                               'sg', 'pl', 'rd', 
                               '1s', '2s', '3s', '4s', 
                               '1d', '2d', '3d', '4d',
                               '1p', '1i', '1e', '2p', '3p', '4p'))
        }

chunk_grammar = """
      etym: {<et><eg|es|ec>*}
      notes: {<nt|np|ng|nd|na|ns|nq>+}
      crossref: {<cf><cn|ce|cr>*}
      lexvalue: {<lv><ln|le|lr>*}
      lexfunc: {<lf><lexvalue>*}
      only: {<ov|on|oe>*<or>?}
      encyc: {<ev|en|ee>*<er>?}
      usage: {<uv|un|ue>*<ur>?}
      example: {<rf|xv><xn|xe>*}
      sense:   {<sn><ps|pn>*<gv|dv|gn|gp|dn|rn|wn|ge|de|re|we|lt|sc>*<example>*<usage>?<encyc>?<only>?<lexfunc>*<crossref>*<mn|tb|sd|is|th|>*<notes>*<so>*}
      variant:  { <va><vn|ve|vr>*}
      subentry:   {<se><hm><id>?<lc>?<ph>?<mr>?<variant>*<sense>+<bw>?<etym>?<paradigm>?<st>}
      entry:   {<lx><hm>?<id>?<lc>?<ph>?<sh>?<mr>?<variant>*<sense>+<bw>?<etym>?<paradigm>?<st>*<subentr>*<dt>}
"""

field_order = {
        'toolbox':   ('_sh', '_DateStampHasFourDigitYear', 'entry'),
        'entry':       ('lx', 'hm', 'sh', 'id', 'lc', 'ph', 'mr', 'variant', 'sense', 'bw', 'etym',
                               'paradigm', 'st', 'subentry', 'dt'),
        'subentry': ('se', 'hm', 'id', 'lc', 'ph', 'mr', 'variant', 'sense', 'bw', 'etym', 
                                'paradigm', 'st'),
        'variant':   ('va', 'vn', 've', 'vr'),
        'sense':       ('sn', 'ps', 'pn', 'gv', 'dv',
                                'chingloss', 'dn', 'chinrev', 'wn',
                                'ge', 'de', 're', 'we',
                                'gr', 'dr', 'rr', 'wr',
                                'lt', 'sc', 'example', 'usage', 'encyc', 'only',
                                'lexfunc', 'sy', 'an', 'crossref', 'mn', 'tb', 'sd', 'is', 'th', 'notes', 'so'),
        'chingloss': ('gn', 'gp'),
        'chinrev':   ('rn', 'rp'),
        'example':   ('rf', 'xv', 'xn', 'xe', 'xr'),
        'usage':       ('uv', 'un', 'ue', 'ur'),
        'encyc':       ('ev', 'en', 'ee', 'er'),
        'only':         ('ov', 'on', 'oe', 'or'),
        'lexfunc':   ('lf', 'lexvalue'),
        'lexvalue': ('lv', 'ln', 'le', 'lr'),
        'crossref': ('cf', 'cn', 'ce', 'cr'),
        'notes':       ('nt', 'np', 'ng', 'nd', 'na', 'ns', 'nq' ),
        'etym':         ('et', 'eg', 'es', 'ec'),
        'paradigm': ('pd', 'pdl', 'pdv', 'pdn', 'pde', 'pdr',
                               'sg', 'pl', 'rd', 
                               '1s', '2s', '3s', '4s', 
                               '1d', '2d', '3d', '4d',
                               '1p', '1i', '1e', '2p', '3p', '4p')
        }

default_fields = {
        'toolbox':   ('_sh', '_DateStampHasFourDigitYear'),
        'entry':       ('lx', 'hm', 'variant', 'sense', 'bw', 'st'),
        'subentry': ('se', 'hm', 'variant', 'sense', 'bw', 'st'),
        'variant':   ('va', ),
        'sense':       ('sn', 'ps', 'dv',
                                'chingloss', 'dn', 
                                'ge', 'de', 
                                'example', 'lexfunc'),
        'chingloss': ('gn', 'gp'),
        'chinrev':   ('rn', 'rp'),
        'example':   ('xv', 'xn', 'xe'),
        'usage':       ('uv', 'un', 'ue'),
        'encyc':       ('ev', 'en', 'ee'),
        'only':         ('ov', 'on', 'oe'),
        'lexfunc':   ('lf', 'lexvalue'),
        'lexvalue': ('lv', ),
        'crossref': ('cf', ),
        'notes':       ('nt', 'nq' ),
        'etym':         ('et', 'eg', ),
        }

blanks_before = {
        'toolbox':   ('entry',),
        'entry':       ('variant', 'sense', 'bw', 'paradigm', 'subentry', 'bw'),
        'subentry': ('variant', 'sense', 'bw', 'paradigm', 'bw'),
        'sense':       ('example', 'usage', 'encyc', 'only', 'lexfunc', 'crossref', 'is', 'notes', 'so'),
        }

blanks_between = {
        'toolbox':   ('entry',),
        'entry':       ('sense', 'bw', 'paradigm', 'subentry', 'bw'),
        'subentry': ('sense', 'bw', 'paradigm', 'bw'),
        'sense':       ('example', 'usage', 'encyc', 'only', 'lexfunc', 'crossref', 'so'),
        }