File: uscript.rl

package info (click to toggle)
golang-github-blevesearch-segment 0.0~git20160915.0.762005e-7
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 4,288 kB
  • sloc: ruby: 202; makefile: 2
file content (101 lines) | stat: -rw-r--r-- 5,030 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# The following Ragel file was autogenerated with ragel/unicode2ragel.rb 
# from: http://www.unicode.org/Public/8.0.0/ucd/Scripts.txt
#
# It defines ["Hangul", "Han", "Hiragana"].
#
# To use this, make sure that your alphtype is set to unsigned char,
# and that your input is in utf8.

%%{
    machine SCRIPTS;
    
    Hangul = 
        0xE1 0x84 0x80..0xFF        #Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL...
      | 0xE1 0x85..0x86 0x00..0xFF  #
      | 0xE1 0x87 0x00..0xBF        #
      | 0xE3 0x80 0xAE..0xAF    #Mc   [2] HANGUL SINGLE DOT TONE MARK..HANGU...
      | 0xE3 0x84 0xB1..0xFF        #Lo  [94] HANGUL LETTER KIYEOK..HANGUL L...
      | 0xE3 0x85..0x85 0x00..0xFF  #
      | 0xE3 0x86 0x00..0x8E        #
      | 0xE3 0x88 0x80..0x9E    #So  [31] PARENTHESIZED HANGUL KIYEOK..PAREN...
      | 0xE3 0x89 0xA0..0xBE    #So  [31] CIRCLED HANGUL KIYEOK..CIRCLED HAN...
      | 0xEA 0xA5 0xA0..0xBC    #Lo  [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANG...
      | 0xEA 0xB0 0x80..0xFF              #Lo [11172] HANGUL SYLLABLE GA..HA...
      | 0xEA 0xB1..0xFF 0x00..0xFF        #
      | 0xEB..0xEC 0x00..0xFF 0x00..0xFF  #
      | 0xED 0x00 0x00..0xFF              #
      | 0xED 0x01..0x9D 0x00..0xFF        #
      | 0xED 0x9E 0x00..0xA3              #
      | 0xED 0x9E 0xB0..0xFF    #Lo  [23] HANGUL JUNGSEONG O-YEO..HANGUL JUN...
      | 0xED 0x9F 0x00..0x86    #
      | 0xED 0x9F 0x8B..0xBB    #Lo  [49] HANGUL JONGSEONG NIEUN-RIEUL..HANG...
      | 0xEF 0xBE 0xA0..0xBE    #Lo  [31] HALFWIDTH HANGUL FILLER..HALFWIDTH...
      | 0xEF 0xBF 0x82..0x87    #Lo   [6] HALFWIDTH HANGUL LETTER A..HALFWID...
      | 0xEF 0xBF 0x8A..0x8F    #Lo   [6] HALFWIDTH HANGUL LETTER YEO..HALFW...
      | 0xEF 0xBF 0x92..0x97    #Lo   [6] HALFWIDTH HANGUL LETTER YO..HALFWI...
      | 0xEF 0xBF 0x9A..0x9C    #Lo   [3] HALFWIDTH HANGUL LETTER EU..HALFWI...
      ;

    Han = 
        0xE2 0xBA 0x80..0x99    #So  [26] CJK RADICAL REPEAT..CJK RADICAL RAP
      | 0xE2 0xBA 0x9B..0xFF    #So  [89] CJK RADICAL CHOKE..CJK RADICAL C-S...
      | 0xE2 0xBB 0x00..0xB3    #
      | 0xE2 0xBC 0x80..0xFF        #So [214] KANGXI RADICAL ONE..KANGXI RAD...
      | 0xE2 0xBD..0xBE 0x00..0xFF  #
      | 0xE2 0xBF 0x00..0x95        #
      | 0xE3 0x80 0x85          #Lm       IDEOGRAPHIC ITERATION MARK
      | 0xE3 0x80 0x87          #Nl       IDEOGRAPHIC NUMBER ZERO
      | 0xE3 0x80 0xA1..0xA9    #Nl   [9] HANGZHOU NUMERAL ONE..HANGZHOU NUM...
      | 0xE3 0x80 0xB8..0xBA    #Nl   [3] HANGZHOU NUMERAL TEN..HANGZHOU NUM...
      | 0xE3 0x80 0xBB          #Lm       VERTICAL IDEOGRAPHIC ITERATION MARK
      | 0xE3 0x90 0x80..0xFF        #Lo [6582] CJK UNIFIED IDEOGRAPH-3400..C...
      | 0xE3 0x91..0xFF 0x00..0xFF  #
      | 0xE4 0x00 0x00..0xFF        #
      | 0xE4 0x01..0xB5 0x00..0xFF  #
      | 0xE4 0xB6 0x00..0xB5        #
      | 0xE4 0xB8 0x80..0xFF              #Lo [20950] CJK UNIFIED IDEOGRAPH-...
      | 0xE4 0xB9..0xFF 0x00..0xFF        #
      | 0xE5..0xE8 0x00..0xFF 0x00..0xFF  #
      | 0xE9 0x00 0x00..0xFF              #
      | 0xE9 0x01..0xBE 0x00..0xFF        #
      | 0xE9 0xBF 0x00..0x95              #
      | 0xEF 0xA4 0x80..0xFF        #Lo [366] CJK COMPATIBILITY IDEOGRAPH-F9...
      | 0xEF 0xA5..0xA8 0x00..0xFF  #
      | 0xEF 0xA9 0x00..0xAD        #
      | 0xEF 0xA9 0xB0..0xFF        #Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA...
      | 0xEF 0xAA..0xAA 0x00..0xFF  #
      | 0xEF 0xAB 0x00..0x99        #
      | 0xF0 0xA0 0x80 0x80..0xFF              #Lo [42711] CJK UNIFIED IDEOG...
      | 0xF0 0xA0 0x81..0xFF 0x00..0xFF        #
      | 0xF0 0xA1..0xA9 0x00..0xFF 0x00..0xFF  #
      | 0xF0 0xAA 0x00 0x00..0xFF              #
      | 0xF0 0xAA 0x01..0x9A 0x00..0xFF        #
      | 0xF0 0xAA 0x9B 0x00..0x96              #
      | 0xF0 0xAA 0x9C 0x80..0xFF        #Lo [4149] CJK UNIFIED IDEOGRAPH-2A...
      | 0xF0 0xAA 0x9D..0xFF 0x00..0xFF  #
      | 0xF0 0xAB 0x00 0x00..0xFF        #
      | 0xF0 0xAB 0x01..0x9B 0x00..0xFF  #
      | 0xF0 0xAB 0x9C 0x00..0xB4        #
      | 0xF0 0xAB 0x9D 0x80..0xFF        #Lo [222] CJK UNIFIED IDEOGRAPH-2B7...
      | 0xF0 0xAB 0x9E..0x9F 0x00..0xFF  #
      | 0xF0 0xAB 0xA0 0x00..0x9D        #
      | 0xF0 0xAB 0xA0 0xA0..0xFF        #Lo [5762] CJK UNIFIED IDEOGRAPH-2B...
      | 0xF0 0xAB 0xA1..0xFF 0x00..0xFF  #
      | 0xF0 0xAC 0x00 0x00..0xFF        #
      | 0xF0 0xAC 0x01..0xB9 0x00..0xFF  #
      | 0xF0 0xAC 0xBA 0x00..0xA1        #
      | 0xF0 0xAF 0xA0 0x80..0xFF        #Lo [542] CJK COMPATIBILITY IDEOGRA...
      | 0xF0 0xAF 0xA1..0xA7 0x00..0xFF  #
      | 0xF0 0xAF 0xA8 0x00..0x9D        #
      ;

    Hiragana = 
        0xE3 0x81 0x81..0xFF    #Lo  [86] HIRAGANA LETTER SMALL A..HIRAGANA ...
      | 0xE3 0x82 0x00..0x96    #
      | 0xE3 0x82 0x9D..0x9E    #Lm   [2] HIRAGANA ITERATION MARK..HIRAGANA ...
      | 0xE3 0x82 0x9F          #Lo       HIRAGANA DIGRAPH YORI
      | 0xF0 0x9B 0x80 0x81     #Lo       HIRAGANA LETTER ARCHAIC YE
      | 0xF0 0x9F 0x88 0x80     #So       SQUARE HIRAGANA HOKA
      ;

}%%