File: synonyms.py

package info (click to toggle)
python-languagecodes 1.1.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 640 kB
  • sloc: python: 16,549; makefile: 7; sh: 5
file content (41 lines) | stat: -rw-r--r-- 992 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# This is a set of synonyms for pragmatic usage in NLP. It is based on
# working with Tesseract 3.04, but should be applicable elsewhere.

from typing import Iterable


LANG_SYNONYMS = [
    ("srp", "hbs", "hrv", "bos"),
    ("sli", "alb"),
    ("slk", "slo"),
    ("ron", "rum"),
    ("nor", "non"),
    ("nld", "dut"),
    ("mya", "bur"),
    ("msa", "may"),
    ("mkd", "mac"),
    ("kat", "geo"),
    ("isl", "ice"),
    ("isl", "ice"),
    ("fre", "fra"),
    ("fas", "per"),
    ("eus", "baq"),
    ("ell", "gre"),
    ("ger", "deu"),
    ("wel", "cym"),
    ("chi_sim", "chi_tra", "chi", "zho"),
    ("ces", "cze"),
    ("bod", "tib"),
    ("aze_cyrl", "aze"),
    ("fil", "tgl"),
    ("nep", "npi"),
    ("bur", "mya", "int", "tvn", "tco", "rki", "rmz"),
]


def expand_synonyms(language: str) -> Iterable[str]:
    """Expand a language code into a set of codes."""
    for synonyms in LANG_SYNONYMS:
        if language in synonyms:
            return synonyms
    return [language]