File: generate-unicode-tables

package info (click to toggle)
rust-ucd-generate 0.3.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,528 kB
  • sloc: sh: 36; makefile: 2
file content (61 lines) | stat: -rwxr-xr-x 2,116 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/sh

# This script is responsible for generating some of the Unicode tables used
# in this project. It's a little weird here since ucd-generate is itself
# used to build the tables used by some of its dependencies. However, most
# tables are generated only for use in tests and benchmarks.
#
# Usage is simple, first download the Unicode data:
#
#   $ mkdir ucd
#   $ cd ucd
#   $ curl -LO https://www.unicode.org/Public/zipped/14.0.0/UCD.zip
#   $ unzip UCD.zip
#
# And then run this script from the root of this repository by pointing it at
# the data directory downloaded above:
#
#   $ ./scripts/generate-unicode-tables path/to/ucd

if [ $# != 1 ]; then
    echo "Usage: $(basename "$0") <ucd-data-directory>" >&2
    exit 1
fi
ucddir="$1"

echo "generating FSTs for benchmarks"
out="benches/tables/fst"
ucd-generate general-category \
    "$ucddir" --exclude unassigned --enum --fst-dir "$out"
ucd-generate jamo-short-name \
    "$ucddir" --fst-dir "$out"
ucd-generate names "$ucddir" \
    --no-aliases --no-hangul --no-ideograph --fst-dir "$out"

echo "generating sorted slices for benchmarks"
out="benches/tables/slice"
ucd-generate general-category \
    "$ucddir" --exclude unassigned > "$out/general_categories.rs"
ucd-generate general-category \
    "$ucddir" --exclude unassigned --enum > "$out/general_category.rs"
ucd-generate jamo-short-name \
    "$ucddir" > "$out/jamo_short_name.rs"
ucd-generate names \
    "$ucddir" --no-aliases --no-hangul --no-ideograph > "$out/names.rs"

echo "generating tables for ucd-trie benchmarks"
out="benches/tables/trie"
ucd-generate general-category \
    "$ucddir" --exclude unassigned --trie-set > "$out/general_categories.rs"

echo "generating tables for ucd-trie tests"
out="ucd-trie/src"
ucd-generate general-category "$ucddir" > "$out/general_category.rs"

echo "generating tables for ucd-util tests"
out="ucd-util/src/unicode_tables"
ucd-generate property-names "$ucddir" > "$out/property_names.rs"
ucd-generate property-values "$ucddir" > "$out/property_values.rs"
ucd-generate jamo-short-name "$ucddir" > "$out/jamo_short_name.rs"

cargo +stable fmt