File: update-unicode-data.sh

package info (click to toggle)
glib2.0 2.84.1-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 66,144 kB
  • sloc: ansic: 538,877; python: 9,624; sh: 1,572; xml: 1,482; perl: 1,222; cpp: 535; makefile: 316; javascript: 11
file content (45 lines) | stat: -rwxr-xr-x 1,236 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env bash
#
# Copyright 2022 Canonical Limited
#
# SPDX-License-Identifier: LGPL-2.1-or-later
#
# Original author: Marco Trevisan

if [ ! -d "$1" ]; then
    echo "Usage $(basename "$0") UCD-directory [version]"
    exit 1
fi

ucd=$(realpath "$1")
version=$2
glib_dir=$(git -C "$(dirname "$0")" rev-parse --show-toplevel)

# shellcheck disable=SC2144 # we only want to match a file like this
if ! [ -f "$ucd"/UnicodeData*.txt ] || ! [ -f "$ucd"/CaseFolding.*txt ]; then
    echo "'$ucd' does not look like an Unicode Database directory";
fi

if [ -z "$version" ]; then
    readme=("$ucd"/ReadMe*.txt)
    version=$(sed -n "s,.*Version \([0-9.]\+\) of the Unicode Standard.*,\1,p" \
        "${readme[@]}")

    if [ -z "$version" ]; then
        echo "Invalid version found"
        exit 1
    fi
fi

cd "$glib_dir" || exit 1

echo "Updating generated code to Unicode version $version"
set -xe

(cd glib && ./gen-unicode-tables.pl -both "$version" "$ucd")
glib/tests/gen-casefold-txt.py "$version" \
    "$ucd"/CaseFolding*.txt > glib/tests/casefold.txt
glib/tests/gen-casemap-txt.py "$version" \
    "$ucd"/UnicodeData*.txt \
    "$ucd"/SpecialCasing*.txt > glib/tests/casemap.txt
cp "$ucd"/NormalizationTest.txt glib/tests/