1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
#!/bin/sh
# This script is responsible for generating some of the Unicode tables used
# in this project. It's a little weird here since ucd-generate is itself
# used to build the tables used by some of its dependencies. However, most
# tables are generated only for use in tests and benchmarks.
#
# Usage is simple, first download the Unicode data:
#
# $ mkdir ucd
# $ cd ucd
# $ curl -LO https://www.unicode.org/Public/zipped/14.0.0/UCD.zip
# $ unzip UCD.zip
#
# And then run this script from the root of this repository by pointing it at
# the data directory downloaded above:
#
# $ ./scripts/generate-unicode-tables path/to/ucd
if [ $# != 1 ]; then
echo "Usage: $(basename "$0") <ucd-data-directory>" >&2
exit 1
fi
ucddir="$1"
echo "generating FSTs for benchmarks"
out="benches/tables/fst"
ucd-generate general-category \
"$ucddir" --exclude unassigned --enum --fst-dir "$out"
ucd-generate jamo-short-name \
"$ucddir" --fst-dir "$out"
ucd-generate names "$ucddir" \
--no-aliases --no-hangul --no-ideograph --fst-dir "$out"
echo "generating sorted slices for benchmarks"
out="benches/tables/slice"
ucd-generate general-category \
"$ucddir" --exclude unassigned > "$out/general_categories.rs"
ucd-generate general-category \
"$ucddir" --exclude unassigned --enum > "$out/general_category.rs"
ucd-generate jamo-short-name \
"$ucddir" > "$out/jamo_short_name.rs"
ucd-generate names \
"$ucddir" --no-aliases --no-hangul --no-ideograph > "$out/names.rs"
echo "generating tables for ucd-trie benchmarks"
out="benches/tables/trie"
ucd-generate general-category \
"$ucddir" --exclude unassigned --trie-set > "$out/general_categories.rs"
echo "generating tables for ucd-trie tests"
out="ucd-trie/src"
ucd-generate general-category "$ucddir" > "$out/general_category.rs"
echo "generating tables for ucd-util tests"
out="ucd-util/src/unicode_tables"
ucd-generate property-names "$ucddir" > "$out/property_names.rs"
ucd-generate property-values "$ucddir" > "$out/property_values.rs"
ucd-generate jamo-short-name "$ucddir" > "$out/jamo_short_name.rs"
cargo +stable fmt
|