File: slice_cpdiff.py

package info (click to toggle)
nam-files 2024.09.25-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 8,384 kB
  • sloc: python: 510; makefile: 5
file content (73 lines) | stat: -rw-r--r-- 1,884 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""Diff the codepoints supported by two slice files.

Usage:

    python3 scripts/slice_cpdiff file1.nam file2.
"""

from collections import defaultdict
from pathlib import Path
import sys
from typing import Set
import unicodeblock.blocks


def codepoints(slice_file: Path) -> Set[int]:
    codepoints = set()
    with open(slice_file) as f:
        for line in f:
            comment_start = line.find("#")
            if comment_start != -1:
                line = line[:comment_start]
            line = line.strip()
            if not line.startswith("codepoints: "):
                continue
            codepoints.add(int(line[line.index(" ") :]))
    return codepoints


def path_to_file(maybe: str) -> Path:
    path = Path(maybe)
    assert path.is_file(), f"{path} must be a file"
    return path


def list(tag: str, source: str, codepoints: Set[int]):
    for cp in sorted(codepoints):
        print(f"{tag} {source} 0x{cp:04x}")


def list_blocks(prefix: str, codepoints: Set[int]):
    blocks = defaultdict(int)
    for cp in codepoints:
        blocks[unicodeblock.blocks.of(chr(cp))] += 1
    blocks = sorted([(count, block) for (block, count) in blocks.items()], reverse=True)
    for count, block in blocks:
        print(f"{prefix}{block} {count}")


def main(argv):
    assert len(argv) == 3, "Must have exactly two arguments"

    nam1 = path_to_file(argv[1])
    nam2 = path_to_file(argv[2])
    nam1cp = codepoints(nam1)
    nam2cp = codepoints(nam2)

    identical = nam1cp & nam2cp
    only1 = nam1cp - nam2cp
    only2 = nam2cp - nam1cp

    list("only", nam1.name, only1)
    list("only", nam2.name, only2)

    print()
    print(f"{len(identical)} match")
    print(f"{len(only1)} only in {nam1.name}")
    list_blocks("  ", only1)
    print(f"{len(only2)} only in {nam2.name}")
    list_blocks("  ", only2)


if __name__ == "__main__":
    main(sys.argv)