File: _regex_unicode.h

package info (click to toggle)
python-regex 0.1.20201113-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,272 kB
  • sloc: ansic: 65,289; python: 15,379; makefile: 39
file content (298 lines) | stat: -rw-r--r-- 10,554 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
typedef unsigned char RE_UINT8;
typedef signed char RE_INT8;
typedef unsigned short RE_UINT16;
typedef signed short RE_INT16;
typedef unsigned int RE_UINT32;
typedef signed int RE_INT32;

typedef unsigned char BOOL;
#if !defined(FALSE) || !defined(TRUE)
#define FALSE 0
#define TRUE 1
#endif

#define RE_ASCII_MAX 0x7F
#define RE_LOCALE_MAX 0xFF

#define RE_MAX_CASES 4
#define RE_MAX_FOLDED 3
#define RE_MAX_SCX 21

typedef struct RE_Property {
    RE_UINT16 name;
    RE_UINT8 id;
    RE_UINT8 value_set;
} RE_Property;

typedef struct RE_PropertyValue {
    RE_UINT16 name;
    RE_UINT8 value_set;
    RE_UINT16 id;
} RE_PropertyValue;

typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);

#define RE_PROP_GC 0x1E
#define RE_PROP_CASED 0xA
#define RE_PROP_UPPERCASE 0x56
#define RE_PROP_LOWERCASE 0x33
#define RE_PROP_SCX 0x50

#define RE_PROP_C 30
#define RE_PROP_L 31
#define RE_PROP_M 32
#define RE_PROP_N 33
#define RE_PROP_P 34
#define RE_PROP_S 35
#define RE_PROP_Z 36
#define RE_PROP_ASSIGNED 37
#define RE_PROP_CASEDLETTER 38

#define RE_PROP_CN 0
#define RE_PROP_CC 1
#define RE_PROP_ZS 2
#define RE_PROP_PO 3
#define RE_PROP_SC 4
#define RE_PROP_PS 5
#define RE_PROP_PE 6
#define RE_PROP_SM 7
#define RE_PROP_PD 8
#define RE_PROP_ND 9
#define RE_PROP_LU 10
#define RE_PROP_SK 11
#define RE_PROP_PC 12
#define RE_PROP_LL 13
#define RE_PROP_SO 14
#define RE_PROP_LO 15
#define RE_PROP_PI 16
#define RE_PROP_CF 17
#define RE_PROP_NO 18
#define RE_PROP_PF 19
#define RE_PROP_LT 20
#define RE_PROP_LM 21
#define RE_PROP_MN 22
#define RE_PROP_ME 23
#define RE_PROP_MC 24
#define RE_PROP_NL 25
#define RE_PROP_ZL 26
#define RE_PROP_ZP 27
#define RE_PROP_CS 28
#define RE_PROP_CO 29

#define RE_PROP_C_MASK 0x30020003
#define RE_PROP_L_MASK 0x0030A400
#define RE_PROP_M_MASK 0x01C00000
#define RE_PROP_N_MASK 0x02040200
#define RE_PROP_P_MASK 0x00091168
#define RE_PROP_S_MASK 0x00004890
#define RE_PROP_Z_MASK 0x0C000004

#define RE_PROP_ALNUM 0x010001
#define RE_PROP_ALPHA 0x000001
#define RE_PROP_ANY 0x020001
#define RE_PROP_ASCII 0x080001
#define RE_PROP_BLANK 0x070001
#define RE_PROP_CNTRL 0x1E0001
#define RE_PROP_DIGIT 0x1E0009
#define RE_PROP_GRAPH 0x1F0001
#define RE_PROP_LOWER 0x330001
#define RE_PROP_PRINT 0x4B0001
#define RE_PROP_SPACE 0x580001
#define RE_PROP_UPPER 0x560001
#define RE_PROP_WORD 0x590001
#define RE_PROP_XDIGIT 0x5B0001
#define RE_PROP_POSIX_ALNUM 0x460001
#define RE_PROP_POSIX_DIGIT 0x470001
#define RE_PROP_POSIX_PUNCT 0x480001
#define RE_PROP_POSIX_XDIGIT 0x490001

#define RE_WBREAK_OTHER 0
#define RE_WBREAK_LF 1
#define RE_WBREAK_NEWLINE 2
#define RE_WBREAK_CR 3
#define RE_WBREAK_WSEGSPACE 4
#define RE_WBREAK_DOUBLEQUOTE 5
#define RE_WBREAK_SINGLEQUOTE 6
#define RE_WBREAK_MIDNUM 7
#define RE_WBREAK_MIDNUMLET 8
#define RE_WBREAK_NUMERIC 9
#define RE_WBREAK_MIDLETTER 10
#define RE_WBREAK_ALETTER 11
#define RE_WBREAK_EXTENDNUMLET 12
#define RE_WBREAK_FORMAT 13
#define RE_WBREAK_EXTEND 14
#define RE_WBREAK_HEBREWLETTER 15
#define RE_WBREAK_ZWJ 16
#define RE_WBREAK_KATAKANA 17
#define RE_WBREAK_REGIONALINDICATOR 18
#define RE_WBREAK_EBASE 19
#define RE_WBREAK_EBASEGAZ 20
#define RE_WBREAK_EMODIFIER 21
#define RE_WBREAK_GLUEAFTERZWJ 22

#define RE_GBREAK_OTHER 0
#define RE_GBREAK_CONTROL 1
#define RE_GBREAK_LF 2
#define RE_GBREAK_CR 3
#define RE_GBREAK_EXTEND 4
#define RE_GBREAK_PREPEND 5
#define RE_GBREAK_SPACINGMARK 6
#define RE_GBREAK_L 7
#define RE_GBREAK_V 8
#define RE_GBREAK_T 9
#define RE_GBREAK_ZWJ 10
#define RE_GBREAK_LV 11
#define RE_GBREAK_LVT 12
#define RE_GBREAK_REGIONALINDICATOR 13
#define RE_GBREAK_EBASE 14
#define RE_GBREAK_EBASEGAZ 15
#define RE_GBREAK_EMODIFIER 16
#define RE_GBREAK_GLUEAFTERZWJ 17

#define RE_LBREAK_UNKNOWN 0
#define RE_LBREAK_COMBININGMARK 1
#define RE_LBREAK_BREAKAFTER 2
#define RE_LBREAK_LINEFEED 3
#define RE_LBREAK_MANDATORYBREAK 4
#define RE_LBREAK_CARRIAGERETURN 5
#define RE_LBREAK_SPACE 6
#define RE_LBREAK_EXCLAMATION 7
#define RE_LBREAK_QUOTATION 8
#define RE_LBREAK_ALPHABETIC 9
#define RE_LBREAK_PREFIXNUMERIC 10
#define RE_LBREAK_POSTFIXNUMERIC 11
#define RE_LBREAK_OPENPUNCTUATION 12
#define RE_LBREAK_CLOSEPARENTHESIS 13
#define RE_LBREAK_INFIXNUMERIC 14
#define RE_LBREAK_HYPHEN 15
#define RE_LBREAK_BREAKSYMBOLS 16
#define RE_LBREAK_NUMERIC 17
#define RE_LBREAK_CLOSEPUNCTUATION 18
#define RE_LBREAK_NEXTLINE 19
#define RE_LBREAK_GLUE 20
#define RE_LBREAK_AMBIGUOUS 21
#define RE_LBREAK_BREAKBEFORE 22
#define RE_LBREAK_HEBREWLETTER 23
#define RE_LBREAK_COMPLEXCONTEXT 24
#define RE_LBREAK_JL 25
#define RE_LBREAK_JV 26
#define RE_LBREAK_JT 27
#define RE_LBREAK_NONSTARTER 28
#define RE_LBREAK_ZWSPACE 29
#define RE_LBREAK_ZWJ 30
#define RE_LBREAK_BREAKBOTH 31
#define RE_LBREAK_INSEPARABLE 32
#define RE_LBREAK_WORDJOINER 33
#define RE_LBREAK_IDEOGRAPHIC 34
#define RE_LBREAK_EBASE 35
#define RE_LBREAK_CONDITIONALJAPANESESTARTER 36
#define RE_LBREAK_H2 37
#define RE_LBREAK_H3 38
#define RE_LBREAK_SURROGATE 39
#define RE_LBREAK_CONTINGENTBREAK 40
#define RE_LBREAK_REGIONALINDICATOR 41
#define RE_LBREAK_EMODIFIER 42

extern char* re_strings[1447];
extern RE_Property re_properties[173];
extern RE_PropertyValue re_property_values[1589];
extern RE_UINT16 re_expand_on_folding[104];
extern RE_GetPropertyFunc re_get_property[94];

RE_UINT32 re_get_alphabetic(RE_UINT32 codepoint);
RE_UINT32 re_get_alphanumeric(RE_UINT32 codepoint);
RE_UINT32 re_get_any(RE_UINT32 codepoint);
RE_UINT32 re_get_ascii_hex_digit(RE_UINT32 codepoint);
RE_UINT32 re_get_bidi_class(RE_UINT32 codepoint);
RE_UINT32 re_get_bidi_control(RE_UINT32 codepoint);
RE_UINT32 re_get_bidi_mirrored(RE_UINT32 codepoint);
RE_UINT32 re_get_blank(RE_UINT32 codepoint);
RE_UINT32 re_get_block(RE_UINT32 codepoint);
RE_UINT32 re_get_canonical_combining_class(RE_UINT32 codepoint);
RE_UINT32 re_get_cased(RE_UINT32 codepoint);
RE_UINT32 re_get_case_ignorable(RE_UINT32 codepoint);
RE_UINT32 re_get_changes_when_casefolded(RE_UINT32 codepoint);
RE_UINT32 re_get_changes_when_casemapped(RE_UINT32 codepoint);
RE_UINT32 re_get_changes_when_lowercased(RE_UINT32 codepoint);
RE_UINT32 re_get_changes_when_titlecased(RE_UINT32 codepoint);
RE_UINT32 re_get_changes_when_uppercased(RE_UINT32 codepoint);
RE_UINT32 re_get_dash(RE_UINT32 codepoint);
RE_UINT32 re_get_decomposition_type(RE_UINT32 codepoint);
RE_UINT32 re_get_default_ignorable_code_point(RE_UINT32 codepoint);
RE_UINT32 re_get_deprecated(RE_UINT32 codepoint);
RE_UINT32 re_get_diacritic(RE_UINT32 codepoint);
RE_UINT32 re_get_east_asian_width(RE_UINT32 codepoint);
RE_UINT32 re_get_emoji(RE_UINT32 codepoint);
RE_UINT32 re_get_emoji_component(RE_UINT32 codepoint);
RE_UINT32 re_get_emoji_modifier(RE_UINT32 codepoint);
RE_UINT32 re_get_emoji_modifier_base(RE_UINT32 codepoint);
RE_UINT32 re_get_emoji_presentation(RE_UINT32 codepoint);
RE_UINT32 re_get_extended_pictographic(RE_UINT32 codepoint);
RE_UINT32 re_get_extender(RE_UINT32 codepoint);
RE_UINT32 re_get_general_category(RE_UINT32 codepoint);
RE_UINT32 re_get_graph(RE_UINT32 codepoint);
RE_UINT32 re_get_grapheme_base(RE_UINT32 codepoint);
RE_UINT32 re_get_grapheme_cluster_break(RE_UINT32 codepoint);
RE_UINT32 re_get_grapheme_extend(RE_UINT32 codepoint);
RE_UINT32 re_get_grapheme_link(RE_UINT32 codepoint);
RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 codepoint);
RE_UINT32 re_get_hex_digit(RE_UINT32 codepoint);
RE_UINT32 re_get_hyphen(RE_UINT32 codepoint);
RE_UINT32 re_get_id_continue(RE_UINT32 codepoint);
RE_UINT32 re_get_ideographic(RE_UINT32 codepoint);
RE_UINT32 re_get_ids_binary_operator(RE_UINT32 codepoint);
RE_UINT32 re_get_id_start(RE_UINT32 codepoint);
RE_UINT32 re_get_ids_trinary_operator(RE_UINT32 codepoint);
RE_UINT32 re_get_indic_positional_category(RE_UINT32 codepoint);
RE_UINT32 re_get_indic_syllabic_category(RE_UINT32 codepoint);
RE_UINT32 re_get_join_control(RE_UINT32 codepoint);
RE_UINT32 re_get_joining_group(RE_UINT32 codepoint);
RE_UINT32 re_get_joining_type(RE_UINT32 codepoint);
RE_UINT32 re_get_line_break(RE_UINT32 codepoint);
RE_UINT32 re_get_logical_order_exception(RE_UINT32 codepoint);
RE_UINT32 re_get_lowercase(RE_UINT32 codepoint);
RE_UINT32 re_get_math(RE_UINT32 codepoint);
RE_UINT32 re_get_nfc_quick_check(RE_UINT32 codepoint);
RE_UINT32 re_get_nfd_quick_check(RE_UINT32 codepoint);
RE_UINT32 re_get_nfkc_quick_check(RE_UINT32 codepoint);
RE_UINT32 re_get_nfkd_quick_check(RE_UINT32 codepoint);
RE_UINT32 re_get_noncharacter_code_point(RE_UINT32 codepoint);
RE_UINT32 re_get_numeric_type(RE_UINT32 codepoint);
RE_UINT32 re_get_numeric_value(RE_UINT32 codepoint);
RE_UINT32 re_get_other_alphabetic(RE_UINT32 codepoint);
RE_UINT32 re_get_other_default_ignorable_code_point(RE_UINT32 codepoint);
RE_UINT32 re_get_other_grapheme_extend(RE_UINT32 codepoint);
RE_UINT32 re_get_other_id_continue(RE_UINT32 codepoint);
RE_UINT32 re_get_other_id_start(RE_UINT32 codepoint);
RE_UINT32 re_get_other_lowercase(RE_UINT32 codepoint);
RE_UINT32 re_get_other_math(RE_UINT32 codepoint);
RE_UINT32 re_get_other_uppercase(RE_UINT32 codepoint);
RE_UINT32 re_get_pattern_syntax(RE_UINT32 codepoint);
RE_UINT32 re_get_pattern_white_space(RE_UINT32 codepoint);
RE_UINT32 re_get_posix_alnum(RE_UINT32 codepoint);
RE_UINT32 re_get_posix_digit(RE_UINT32 codepoint);
RE_UINT32 re_get_posix_punct(RE_UINT32 codepoint);
RE_UINT32 re_get_posix_xdigit(RE_UINT32 codepoint);
RE_UINT32 re_get_prepended_concatenation_mark(RE_UINT32 codepoint);
RE_UINT32 re_get_print(RE_UINT32 codepoint);
RE_UINT32 re_get_quotation_mark(RE_UINT32 codepoint);
RE_UINT32 re_get_radical(RE_UINT32 codepoint);
RE_UINT32 re_get_regional_indicator(RE_UINT32 codepoint);
RE_UINT32 re_get_script(RE_UINT32 codepoint);
int re_get_script_extensions(RE_UINT32 codepoint, RE_UINT8* scripts);
RE_UINT32 re_get_sentence_break(RE_UINT32 codepoint);
RE_UINT32 re_get_sentence_terminal(RE_UINT32 codepoint);
RE_UINT32 re_get_soft_dotted(RE_UINT32 codepoint);
RE_UINT32 re_get_terminal_punctuation(RE_UINT32 codepoint);
RE_UINT32 re_get_unified_ideograph(RE_UINT32 codepoint);
RE_UINT32 re_get_uppercase(RE_UINT32 codepoint);
RE_UINT32 re_get_variation_selector(RE_UINT32 codepoint);
RE_UINT32 re_get_white_space(RE_UINT32 codepoint);
RE_UINT32 re_get_word(RE_UINT32 codepoint);
RE_UINT32 re_get_word_break(RE_UINT32 codepoint);
RE_UINT32 re_get_xdigit(RE_UINT32 codepoint);
RE_UINT32 re_get_xid_continue(RE_UINT32 codepoint);
RE_UINT32 re_get_xid_start(RE_UINT32 codepoint);
int re_get_all_cases(RE_UINT32 codepoint, RE_UINT32* cases);
RE_UINT32 re_get_simple_case_folding(RE_UINT32 codepoint);
int re_get_full_case_folding(RE_UINT32 codepoint, RE_UINT32* folded);