File: find_language_for_character.py

package info (click to toggle)
python-glyphsets 1.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,396 kB
  • sloc: python: 973; xml: 432; sh: 11; makefile: 3
file content (74 lines) | stat: -rw-r--r-- 2,465 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Browse through all gflanguages and find the input character.

Syntax:
python scripts/find_language_for_character.py <character>
(<character> can be either a single 0x0000 string or unicode character.)

Please manually install tabulate if not present: pip install tabulate
"""

import argparse
import gflanguages
import sys
import tabulate
import unicodedata

regions = gflanguages.LoadRegions()
languages = gflanguages.LoadLanguages()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Find languages using a given character."
    )
    parser.add_argument(
        "character", metavar="CHAR", help="Unicode character or 0x0000 string"
    )
    args = parser.parse_args()

    input_character = args.character
    if input_character.startswith("0x"):
        input_character = chr(int(input_character, 16))
    unicode_string = f"{ord(input_character):#0{6}X}".replace("0X", "0x")
    print(
        f"Character: [{input_character}]  ({unicode_string} {unicodedata.name(input_character)})"
    )

    found_languages = []

    # Read language definitions
    for lang_code in languages:
        lang = languages[lang_code]
        if lang.exemplar_chars:
            chars = lang.exemplar_chars
            for category in ("base", "index", "marks", "numerals", "punctuation"):
                if (
                    input_character in getattr(chars, category)
                    or input_character in getattr(chars, category).upper()
                ):
                    # Find regions
                    found_regions = set()
                    for country_code in regions:
                        if country_code in lang.region:
                            found_regions.update(
                                set(regions[country_code].region_group)
                            )

                    found_languages.append(
                        (
                            lang_code,
                            lang.name,
                            category,
                            lang.population,
                            lang.script,
                            ", ".join(list(found_regions)),
                        )
                    )

    found_languages = sorted(found_languages, key=lambda x: x[3], reverse=True)
    print(
        tabulate.tabulate(
            found_languages,
            headers=["Language", "Name", "Category", "Speakers", "Script", "Regions"],
        )
    )