File: _regex_unicode.h

package info (click to toggle)
python-regex 0.1.20120613-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 4,612 kB
  • sloc: ansic: 37,981; python: 12,514; makefile: 49
file content (213 lines) | stat: -rw-r--r-- 6,934 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
typedef unsigned char RE_UINT8;
typedef signed char RE_INT8;
typedef unsigned short RE_UINT16;
typedef signed short RE_INT16;
typedef unsigned int RE_UINT32;
typedef signed int RE_INT32;

typedef unsigned char BOOL;
enum {FALSE, TRUE};

#define RE_ASCII_MAX 0x7F
#define RE_LOCALE_MAX 0xFF
#define RE_UNICODE_MAX 0x10FFFF

#define RE_MAX_CASES 4
#define RE_MAX_FOLDED 3

typedef struct RE_Property {
    RE_UINT16 name;
    RE_UINT8 id;
    RE_UINT8 value_set;
} RE_Property;

typedef struct RE_PropertyValue {
    RE_UINT16 name;
    RE_UINT8 value_set;
    RE_UINT8 id;
} RE_PropertyValue;

typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 ch);

#define RE_PROP_GC 0x1C
#define RE_PROP_CASED 0x8
#define RE_PROP_UPPERCASE 0x7
#define RE_PROP_LOWERCASE 0x6

#define RE_PROP_C 30
#define RE_PROP_L 31
#define RE_PROP_M 32
#define RE_PROP_N 33
#define RE_PROP_P 34
#define RE_PROP_S 35
#define RE_PROP_Z 36

#define RE_PROP_CN 0
#define RE_PROP_LU 1
#define RE_PROP_LL 2
#define RE_PROP_LT 3
#define RE_PROP_LM 4
#define RE_PROP_LO 5
#define RE_PROP_MN 6
#define RE_PROP_ME 7
#define RE_PROP_MC 8
#define RE_PROP_ND 9
#define RE_PROP_NL 10
#define RE_PROP_NO 11
#define RE_PROP_ZS 12
#define RE_PROP_ZL 13
#define RE_PROP_ZP 14
#define RE_PROP_CC 15
#define RE_PROP_CF 16
#define RE_PROP_CO 17
#define RE_PROP_CS 18
#define RE_PROP_PD 19
#define RE_PROP_PS 20
#define RE_PROP_PE 21
#define RE_PROP_PC 22
#define RE_PROP_PO 23
#define RE_PROP_SM 24
#define RE_PROP_SC 25
#define RE_PROP_SK 26
#define RE_PROP_SO 27
#define RE_PROP_PI 28
#define RE_PROP_PF 29

#define RE_PROP_C_MASK 0x00078001
#define RE_PROP_L_MASK 0x0000003E
#define RE_PROP_M_MASK 0x000001C0
#define RE_PROP_N_MASK 0x00000E00
#define RE_PROP_P_MASK 0x30F80000
#define RE_PROP_S_MASK 0x0F000000
#define RE_PROP_Z_MASK 0x00007000

#define RE_PROP_ALNUM 0x460001
#define RE_PROP_ALPHA 0x050001
#define RE_PROP_ANY 0x470001
#define RE_PROP_ASSIGNED 0x480001
#define RE_PROP_BLANK 0x490001
#define RE_PROP_CNTRL 0x1C000F
#define RE_PROP_DIGIT 0x1C0009
#define RE_PROP_GRAPH 0x4A0001
#define RE_PROP_LOWER 0x060001
#define RE_PROP_PRINT 0x4B0001
#define RE_PROP_PUNCT 0x1C0022
#define RE_PROP_SPACE 0x250001
#define RE_PROP_UPPER 0x070001
#define RE_PROP_WORD 0x4C0001
#define RE_PROP_XDIGIT 0x2D0001
#define RE_PROP_ASCII 0x030001

#define RE_BREAK_OTHER 0
#define RE_BREAK_CR 1
#define RE_BREAK_LF 2
#define RE_BREAK_NEWLINE 3
#define RE_BREAK_EXTEND 4
#define RE_BREAK_FORMAT 5
#define RE_BREAK_KATAKANA 6
#define RE_BREAK_ALETTER 7
#define RE_BREAK_MIDLETTER 8
#define RE_BREAK_MIDNUM 9
#define RE_BREAK_MIDNUMLET 10
#define RE_BREAK_NUMERIC 11
#define RE_BREAK_EXTENDNUMLET 12

#define RE_GBREAK_OTHER 0
#define RE_GBREAK_CR 1
#define RE_GBREAK_LF 2
#define RE_GBREAK_CONTROL 3
#define RE_GBREAK_EXTEND 4
#define RE_GBREAK_SPACINGMARK 5
#define RE_GBREAK_L 6
#define RE_GBREAK_V 7
#define RE_GBREAK_T 8
#define RE_GBREAK_LV 9
#define RE_GBREAK_LVT 10
#define RE_GBREAK_PREPEND 11

extern char* re_strings[1135];
extern RE_Property re_properties[144];
extern RE_PropertyValue re_property_values[1218];
extern RE_UINT16 re_expand_on_folding[104];
extern RE_GetPropertyFunc re_get_property[77];

RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 ch);
RE_UINT32 re_get_sentence_break(RE_UINT32 ch);
RE_UINT32 re_get_word_break(RE_UINT32 ch);
RE_UINT32 re_get_block(RE_UINT32 ch);
RE_UINT32 re_get_math(RE_UINT32 ch);
RE_UINT32 re_get_alphabetic(RE_UINT32 ch);
RE_UINT32 re_get_lowercase(RE_UINT32 ch);
RE_UINT32 re_get_uppercase(RE_UINT32 ch);
RE_UINT32 re_get_cased(RE_UINT32 ch);
RE_UINT32 re_get_case_ignorable(RE_UINT32 ch);
RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 ch);
RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 ch);
RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 ch);
RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 ch);
RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 ch);
RE_UINT32 re_get_id_start(RE_UINT32 ch);
RE_UINT32 re_get_id_continue(RE_UINT32 ch);
RE_UINT32 re_get_xid_start(RE_UINT32 ch);
RE_UINT32 re_get_xid_continue(RE_UINT32 ch);
RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 ch);
RE_UINT32 re_get_grapheme_extend(RE_UINT32 ch);
RE_UINT32 re_get_grapheme_base(RE_UINT32 ch);
RE_UINT32 re_get_grapheme_link(RE_UINT32 ch);
RE_UINT32 re_get_bidi_class(RE_UINT32 ch);
RE_UINT32 re_get_bidi_mirrored(RE_UINT32 ch);
RE_UINT32 re_get_canonical_combining_class(RE_UINT32 ch);
RE_UINT32 re_get_decomposition_type(RE_UINT32 ch);
RE_UINT32 re_get_east_asian_width(RE_UINT32 ch);
RE_UINT32 re_get_general_category(RE_UINT32 ch);
RE_UINT32 re_get_joining_group(RE_UINT32 ch);
RE_UINT32 re_get_joining_type(RE_UINT32 ch);
RE_UINT32 re_get_line_break(RE_UINT32 ch);
RE_UINT32 re_get_numeric_type(RE_UINT32 ch);
RE_UINT32 re_get_numeric_value(RE_UINT32 ch);
RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 ch);
RE_UINT32 re_get_indic_matra_category(RE_UINT32 ch);
RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 ch);
RE_UINT32 re_get_white_space(RE_UINT32 ch);
RE_UINT32 re_get_bidi_control(RE_UINT32 ch);
RE_UINT32 re_get_join_control(RE_UINT32 ch);
RE_UINT32 re_get_dash(RE_UINT32 ch);
RE_UINT32 re_get_hyphen(RE_UINT32 ch);
RE_UINT32 re_get_quotation_mark(RE_UINT32 ch);
RE_UINT32 re_get_terminal_punctuation(RE_UINT32 ch);
RE_UINT32 re_get_other_math(RE_UINT32 ch);
RE_UINT32 re_get_hex_digit(RE_UINT32 ch);
RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 ch);
RE_UINT32 re_get_other_alphabetic(RE_UINT32 ch);
RE_UINT32 re_get_ideographic(RE_UINT32 ch);
RE_UINT32 re_get_diacritic(RE_UINT32 ch);
RE_UINT32 re_get_extender(RE_UINT32 ch);
RE_UINT32 re_get_other_lowercase(RE_UINT32 ch);
RE_UINT32 re_get_other_uppercase(RE_UINT32 ch);
RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 ch);
RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 ch);
RE_UINT32 re_get_ids_binary_operator(RE_UINT32 ch);
RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 ch);
RE_UINT32 re_get_radical(RE_UINT32 ch);
RE_UINT32 re_get_unified_ideograph(RE_UINT32 ch);
RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 ch);
RE_UINT32 re_get_deprecated(RE_UINT32 ch);
RE_UINT32 re_get_soft_dotted(RE_UINT32 ch);
RE_UINT32 re_get_logical_order_exception(RE_UINT32 ch);
RE_UINT32 re_get_other_id_start(RE_UINT32 ch);
RE_UINT32 re_get_other_id_continue(RE_UINT32 ch);
RE_UINT32 re_get_sterm(RE_UINT32 ch);
RE_UINT32 re_get_variation_selector(RE_UINT32 ch);
RE_UINT32 re_get_pattern_white_space(RE_UINT32 ch);
RE_UINT32 re_get_pattern_syntax(RE_UINT32 ch);
RE_UINT32 re_get_script(RE_UINT32 ch);
RE_UINT32 re_get_alphanumeric(RE_UINT32 ch);
RE_UINT32 re_get_any(RE_UINT32 ch);
RE_UINT32 re_get_assigned(RE_UINT32 ch);
RE_UINT32 re_get_blank(RE_UINT32 ch);
RE_UINT32 re_get_graph(RE_UINT32 ch);
RE_UINT32 re_get_print(RE_UINT32 ch);
RE_UINT32 re_get_word(RE_UINT32 ch);
int re_get_all_cases(RE_UINT32 ch, RE_UINT32* codepoints);
RE_UINT32 re_get_simple_case_folding(RE_UINT32 ch);
int re_get_full_case_folding(RE_UINT32 ch, RE_UINT32* codepoints);