1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
package mahonia
import (
"sync"
)
// Converters for GB18030 encoding.
func init() {
RegisterCharset(&Charset{
Name: "GB18030",
NewDecoder: func() Decoder {
gb18030Once.Do(buildGB18030Tables)
return decodeGB18030Rune
},
NewEncoder: func() Encoder {
gb18030Once.Do(buildGB18030Tables)
return encodeGB18030Rune
},
})
}
func decodeGB18030Rune(p []byte) (r rune, size int, status Status) {
if len(p) == 0 {
status = NO_ROOM
return
}
b := p[0]
if b < 128 {
return rune(b), 1, SUCCESS
}
if len(p) < 2 {
status = NO_ROOM
return
}
if p[0] < 0x81 || p[0] > 0xfe {
return 0xfffd, 1, INVALID_CHAR
}
if p[1] >= 0x40 {
// 2-byte character
c := uint16(p[0])<<8 + uint16(p[1])
r = rune(gbkToUnicode[c])
if r == 0 {
r = gbkToUnicodeExtra[c]
}
if r != 0 {
return r, 2, SUCCESS
}
} else if p[1] >= 0x30 {
// 4-byte character
if len(p) < 4 {
return 0, 0, NO_ROOM
}
if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 {
return 0xfffd, 1, INVALID_CHAR
}
code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3])
lin := gb18030Linear(code)
if lin <= maxGB18030Linear {
r = rune(gb18030LinearToUnicode[lin])
if r != 0 {
return r, 4, SUCCESS
}
}
for _, rng := range gb18030Ranges {
if lin >= rng.firstGB && lin <= rng.lastGB {
return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS
}
}
}
return 0xfffd, 1, INVALID_CHAR
}
func encodeGB18030Rune(p []byte, r rune) (size int, status Status) {
if len(p) == 0 {
status = NO_ROOM
return
}
if r < 128 {
p[0] = byte(r)
return 1, SUCCESS
}
if len(p) < 2 {
status = NO_ROOM
return
}
var c uint16
if r < 0x10000 {
c = unicodeToGBK[r]
} else {
c = unicodeToGBKExtra[r]
}
if c != 0 {
p[0] = byte(c >> 8)
p[1] = byte(c)
return 2, SUCCESS
}
if len(p) < 4 {
return 0, NO_ROOM
}
if r < 0x10000 {
f := unicodeToGB18030[r]
if f != 0 {
p[0] = byte(f >> 24)
p[1] = byte(f >> 16)
p[2] = byte(f >> 8)
p[3] = byte(f)
return 4, SUCCESS
}
}
for _, rng := range gb18030Ranges {
if r >= rng.firstRune && r <= rng.lastRune {
lin := rng.firstGB + uint32(r) - uint32(rng.firstRune)
p[0] = byte(lin/(10*126*10)) + 0x81
p[1] = byte(lin/(126*10)%10) + 0x30
p[2] = byte(lin/10%126) + 0x81
p[3] = byte(lin%10) + 0x30
return 4, SUCCESS
}
}
p[0] = 0x1a
return 1, INVALID_CHAR
}
var gb18030Once sync.Once
// Mapping from gb18039Linear values to Unicode.
var gb18030LinearToUnicode []uint16
var unicodeToGB18030 []uint32
func buildGB18030Tables() {
gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1)
unicodeToGB18030 = make([]uint32, 65536)
for _, data := range gb18030Data {
gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode
unicodeToGB18030[data.unicode] = data.gb18030
}
}
|