File: _can_cmap_data.py

package info (click to toggle)
python-reportlab 2.0dfsg-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 9,140 kB
  • ctags: 6,455
  • sloc: python: 58,703; xml: 1,494; makefile: 88
file content (58 lines) | stat: -rw-r--r-- 1,810 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#
"""
This is a utility to 'can' the widths data for certain CID fonts.
Now we're using Unicode, we don't need 20 CMAP files for each Asian
language, nor the widths of the non-normal characters encoded in each
font.  we just want a dictionary of the character widths in a given
font which are NOT 1000 ems wide, keyed on Unicode character (not CID).

Running off CMAP files we get the following widths...
>>> font.stringWidth(unicode(','), 10)
2.5
>>> font.stringWidth(unicode('m'), 10)
7.7800000000000002
>>> font.stringWidth(u'\u6771\u4EAC', 10)
20.0
>>> 

"""

from pprint import pprint as pp

from reportlab.pdfbase._cidfontdata import defaultUnicodeEncodings
from reportlab.pdfbase.cidfonts import UnicodeCIDFont


def run():

    buf = []
    buf.append('widthsByUnichar = {}')
    for (fontName, (language, encName)) in defaultUnicodeEncodings.items():
        print 'handling %s : %s : %s' % (fontName, language, encName)

        #this does just about all of it for us, as all the info
        #we need is present.
        font = UnicodeCIDFont(fontName)

        widthsByCID = font.face._explicitWidths
        cmap = font.encoding._cmap
        nonStandardWidthsByUnichar = {}
        for (codePoint, cid) in cmap.items():
            width = widthsByCID.get(cid, 1000)
            if width <> 1000:
                nonStandardWidthsByUnichar[unichr(codePoint)] = width
        

        
        print 'created font width map (%d items).  ' % len(nonStandardWidthsByUnichar)

        buf.append('widthsByUnichar["%s"] = %s' % (fontName, repr(nonStandardWidthsByUnichar)))
        
        
    src = '\n'.join(buf) + '\n'
    open('canned_widths.py','w').write(src)
    print 'wrote canned_widths.py'

if __name__=='__main__':
    run()