1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
// License: GPLv3 Copyright: 2023, Kovid Goyal, <kovid at kovidgoyal.net>
package unicode_names
import (
"bytes"
_ "embed"
"encoding/binary"
"fmt"
"strings"
"sync"
"time"
"github.com/kovidgoyal/kitty/tools/utils"
"github.com/kovidgoyal/kitty/tools/utils/images"
)
type mark_set = *utils.Set[uint16]
//go:embed data_generated.bin
var unicode_name_data string
var _ = fmt.Print
var names map[rune]string
var marks []rune
var word_map map[string][]uint16
func add_word(codepoint uint16, word []byte) {
if codepoint <= 32 || codepoint == 127 || (128 <= codepoint && codepoint <= 159) || len(word) < 2 {
return
}
w := utils.UnsafeBytesToString(word)
word_map[w] = append(word_map[w], codepoint)
}
func add_words(codepoint uint16, raw []byte) {
for len(raw) > 0 {
idx := bytes.IndexByte(raw, ' ')
if idx < 0 {
add_word(codepoint, raw)
break
}
if idx > 0 {
add_word(codepoint, raw[:idx])
}
raw = raw[idx+1:]
}
}
func parse_record(record []byte, mark uint16) {
codepoint := rune(binary.LittleEndian.Uint32(record))
record = record[4:]
marks[mark] = codepoint
namelen := binary.LittleEndian.Uint16(record)
record = record[2:]
name := utils.UnsafeBytesToString(record[:namelen])
names[codepoint] = name
add_words(mark, record[:namelen])
if len(record) > int(namelen) {
add_words(mark, record[namelen:])
}
}
var parse_once sync.Once
func parse_data() {
raw := utils.ReadCompressedEmbeddedData(unicode_name_data)
num_of_lines := binary.LittleEndian.Uint32(raw)
raw = raw[4:]
num_of_words := binary.LittleEndian.Uint32(raw)
raw = raw[4:]
names = make(map[rune]string, num_of_lines)
word_map = make(map[string][]uint16, num_of_words)
marks = make([]rune, num_of_lines)
var mark uint16
for len(raw) > 0 {
record_len := binary.LittleEndian.Uint16(raw)
raw = raw[2:]
parse_record(raw[:record_len], mark)
mark += 1
raw = raw[record_len:]
}
}
func Initialize() {
parse_once.Do(parse_data)
}
func NameForCodePoint(cp rune) string {
Initialize()
return names[cp]
}
func find_matching_codepoints(prefix string) (ans mark_set) {
for q, marks := range word_map {
if strings.HasPrefix(q, prefix) {
if ans == nil {
ans = utils.NewSet[uint16](len(marks) * 2)
}
ans.AddItems(marks...)
}
}
return ans
}
func marks_for_query(query string) (ans mark_set) {
Initialize()
prefixes := strings.Split(strings.ToLower(query), " ")
results := make(chan mark_set, len(prefixes))
ctx := images.Context{}
ctx.Parallel(0, len(prefixes), func(nums <-chan int) {
for i := range nums {
results <- find_matching_codepoints(prefixes[i])
}
})
close(results)
for x := range results {
if ans == nil {
ans = x
} else {
ans = ans.Intersect(x)
}
}
if ans == nil {
ans = utils.NewSet[uint16](0)
}
return
}
func CodePointsForQuery(query string) (ans []rune) {
x := marks_for_query(query)
ans = make([]rune, x.Len())
i := 0
for m := range x.Iterable() {
ans[i] = marks[m]
i += 1
}
return
}
func Develop() {
start := time.Now()
Initialize()
fmt.Println("Parsing unicode name data took:", time.Since(start))
start = time.Now()
num := CodePointsForQuery("arr")
fmt.Println("Querying arr took:", time.Since(start), "and found:", len(num))
start = time.Now()
num = CodePointsForQuery("arr right")
fmt.Println("Querying arr right took:", time.Since(start), "and found:", len(num))
}
|