File: compare.py

package info (click to toggle)
textdistance 4.6.3-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 448 kB
  • sloc: python: 2,728; sh: 4; makefile: 3
file content (33 lines) | stat: -rw-r--r-- 828 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# built-in
from itertools import islice
from pathlib import Path
from sys import argv

# project
from textdistance import EntropyNCD


# read files
licenses = dict()
for path in Path('choosealicense.com', '_licenses').iterdir():
    licenses[path.stem] = path.read_text()

# show licenses list if no arguments passed
if len(argv) == 1:
    print(*sorted(licenses.keys()), sep='\n')
    exit(1)

# compare all with one
qval = int(argv[1]) if argv[1] else None
compare_with = argv[2]
distances = dict()
for name, content in licenses.items():
    distances[name] = EntropyNCD(qval=qval)(
        licenses[compare_with],
        content,
    )

# show 5 most similar
sorted_distances = sorted(distances.items(), key=lambda d: d[1])
for name, distance in islice(sorted_distances, 5):
    print('{:20} {:.4f}'.format(name, distance))