File: cmapshare.py

package info (click to toggle)
mupdf 1.25.1%2Bds1-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 21,620 kB
  • sloc: ansic: 270,929; python: 20,709; java: 6,916; javascript: 1,865; makefile: 1,130; xml: 550; sh: 430; cpp: 325; cs: 313; awk: 10; sed: 7; lisp: 3
file content (59 lines) | stat: -rw-r--r-- 1,489 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python3

# Find and extract common CMap subsets.
# Taken flattened CMaps as input, using only the 'cidchar' sections.
# The outputs are truncated; so use 'cmapflatten.py' to clean them up.

import sys, os

def load_cmap_set(filename):
	cmap = set()
	active = False
	for line in open(filename).readlines():
		line = line.strip()
		if line.endswith("endcidchar"): active = False
		if active: cmap.add(line)
		if line.endswith("begincidchar"): active = True
	return cmap

def load_cmap_prologue(filename):
	prologue = []
	for line in open(filename).readlines():
		line = line.strip()
		if line.endswith("begincidchar"):
			break
		prologue.append(line)
	return prologue

epilogue = [
	'endcidchar',
]

common_name = os.path.basename(sys.argv[1])

# First find the common subset
common = load_cmap_set(sys.argv[2])
for f in sys.argv[3:]:
	common &= load_cmap_set(f)

def print_cmap(filename, prologue, cmap):
	out = open(filename, "w")
	for line in prologue:
		if not line.endswith("usecmap"):
			print(line, file=out)
		if line == 'begincmap':
			print("/"+common_name, "usecmap", file=out)
	print(len(cmap), "begincidchar", file=out)
	for line in sorted(cmap):
		print(line, file=out)
	for line in epilogue:
		print(line, file=out)

# Print common subset
print_cmap(sys.argv[1], ["/CMapName /%s" % common_name], common)

# Now find unique bits
for f in sys.argv[2:]:
	cmap = load_cmap_set(f) - common
	prologue = load_cmap_prologue(f)
	print_cmap(f+".shared", prologue, cmap)