File: categorize_non_idempotent.py

package info (click to toggle)
python-precis-i18n 1.1.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,836 kB
  • sloc: python: 1,825; sh: 28; makefile: 3
file content (33 lines) | stat: -rw-r--r-- 822 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import unicodedata
from collections import Counter

import precis_i18n as precis


def _escape(s):
    return s.encode("unicode-escape").decode("ascii")


def _idempotent_ignoring_space(profile, value):
    result1 = profile.enforce(value)
    result2 = profile.enforce(result1)
    return result1.strip() == result2.strip()


results = Counter()
profile = precis.get_profile("NicknameCaseMapped:ToLower")

for cp in range(0x0110000):
    char = chr(cp)
    try:
        if not _idempotent_ignoring_space(profile, char):
            decomp = unicodedata.decomposition(char)
            kind = decomp.split()[0]
            if kind.startswith("<"):
                results[kind] += 1
            else:
                print(_escape(char), unicodedata.name(char))
    except UnicodeEncodeError:
        pass

print(results)