File: cmap.lisp

package info (click to toggle)
zpb-ttf 1.0.7-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 272 kB
  • sloc: lisp: 2,341; makefile: 2
file content (317 lines) | stat: -rw-r--r-- 14,460 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
;;; Copyright (c) 2006 Zachary Beane, All Rights Reserved
;;;
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;;
;;;   * Redistributions of source code must retain the above copyright
;;;     notice, this list of conditions and the following disclaimer.
;;;
;;;   * Redistributions in binary form must reproduce the above
;;;     copyright notice, this list of conditions and the following
;;;     disclaimer in the documentation and/or other materials
;;;     provided with the distribution.
;;;
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;
;;; Loading data from the "cmap" table.
;;;
;;;  https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
;;;  http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html
;;;
;;; $Id: cmap.lisp,v 1.15 2006/03/23 22:23:32 xach Exp $

(in-package #:zpb-ttf)

(deftype cmap-value-table ()
  `(array (unsigned-byte 16) (*)))

;;; FIXME: "unicode-cmap" is actually a format 4 character map that
;;; happens to currently be loaded from a Unicode-compatible
;;; subtable. However, other character maps (like Microsoft's Symbol
;;; encoding) also use format 4 and could be loaded with these
;;; "unicode" objects and functions.

(defclass unicode-cmap ()
  ((segment-count :initarg :segment-count :reader segment-count)
   (end-codes :initarg :end-codes :reader end-codes)
   (start-codes :initarg :start-codes :reader start-codes)
   (id-deltas :initarg :id-deltas :reader id-deltas)
   (id-range-offsets :initarg :id-range-offsets :reader id-range-offsets)
   (glyph-indexes :initarg :glyph-indexes :accessor glyph-indexes)))

(defclass format-12-cmap ()
  ((group-count :initarg :group-count :reader group-count)
   (start-codes :initarg :start-codes :reader start-codes)
   (end-codes :initarg :end-codes :reader end-codes)
   (glyph-starts :initarg :glyph-starts :accessor glyph-starts)))

(defun load-unicode-cmap-format12 (stream)
  "Load a Unicode character map of type 12 from STREAM starting at the
current offset. Assumes format is already read and checked."
  (let* ((reserved (read-uint16 stream))
         (subtable-length (read-uint32 stream))
         (language-code (read-uint32 stream))
         (group-count (read-uint32 stream))
         (start-codes (make-array group-count
                                  :element-type '(unsigned-byte 32)
                                  :initial-element 0))
         (end-codes (make-array group-count
                                :element-type '(unsigned-byte 32)
                                :initial-element 0))
         (glyph-starts (make-array group-count
                                   :element-type '(unsigned-byte 32)
                                   :initial-element 0)))
    (declare (ignore reserved language-code subtable-length))
    (loop for i below group-count
          do (setf (aref start-codes i) (read-uint32 stream)
                   (aref end-codes i) (read-uint32 stream)
                   (aref glyph-starts i) (read-uint32 stream)))
    (make-instance 'format-12-cmap
                   :group-count group-count
                   :start-codes start-codes
                   :end-codes end-codes
                   :glyph-starts glyph-starts)))

(defun load-unicode-cmap (stream)
  "Load a Unicode character map of type 4 or 12 from STREAM starting at
the current offset."
  (let ((format (read-uint16 stream)))
    (when (= format 12)
      (return-from load-unicode-cmap (load-unicode-cmap-format12 stream)))
    (when (/= format 4)
      (error 'unsupported-format
             :location "\"cmap\" subtable"
             :actual-value format
             :expected-values (list 4))))
  (let ((table-start (- (file-position stream) 2))
        (subtable-length (read-uint16 stream))
        (language-code (read-uint16 stream))
        (segment-count (/ (read-uint16 stream) 2))
        (search-range (read-uint16 stream))
        (entry-selector (read-uint16 stream))
        (range-shift (read-uint16 stream)))
    (declare (ignore language-code search-range entry-selector range-shift))
    (flet ((make-and-load-array (&optional (size segment-count))
             (loop with array = (make-array size
                                            :element-type '(unsigned-byte 16)
                                            :initial-element 0)
                   for i below size
                   do (setf (aref array i) (read-uint16 stream))
                   finally (return array)))
           (make-signed (i)
             (if (logbitp 15 i)
                 (1- (- (logandc2 #xFFFF i)))
                 i)))
      (let ((end-codes (make-and-load-array))
            (pad (read-uint16 stream))
            (start-codes (make-and-load-array))
            (id-deltas (make-and-load-array))
            (id-range-offsets (make-and-load-array))
            (glyph-index-array-size (/ (- subtable-length
                                          (- (file-position stream)
                                             table-start))
                                       2)))
        (declare (ignore pad))
        (make-instance 'unicode-cmap
                       :segment-count segment-count
                       :end-codes end-codes
                       :start-codes start-codes
                       ;; these are really signed, so sign them
                       :id-deltas (map 'vector #'make-signed id-deltas)
                       :id-range-offsets id-range-offsets
                       :glyph-indexes (make-and-load-array glyph-index-array-size))))))


(defun %decode-format-4-cmap-code-point-index (code-point cmap index)
  "Return the index of the Unicode CODE-POINT in a format 4 CMAP, if
present, otherwise NIL. Assumes INDEX points to the element of the
CMAP arrays (END-CODES etc) corresponding to code-point."
  (with-slots (end-codes start-codes
               id-deltas id-range-offsets
               glyph-indexes)
      cmap
    (declare (type cmap-value-table
                   end-codes start-codes
                   id-range-offsets
                   glyph-indexes))
    (let ((start-code (aref start-codes index))
          (end-code (aref end-codes index))
          (id-range-offset (aref id-range-offsets index))
          (id-delta (aref id-deltas index)))
      (cond
        ((< code-point start-code)
         0)
        ;; ignore empty final segment
        ((and (= 65535 start-code end-code))
         0)
        ((zerop id-range-offset)
         (logand #xFFFF (+ code-point id-delta)))
        (t
         (let* ((glyph-index-offset (- (+ index
                                          (ash id-range-offset -1)
                                          (- code-point start-code))
                                       (segment-count cmap)))
                (glyph-index (aref (glyph-indexes cmap)
                                   glyph-index-offset)))
           (logand #xFFFF
                   (+ glyph-index id-delta))))))))

(defun %decode-format-12-cmap-code-point-index (code-point cmap index)
  "Return the index of the Unicode CODE-POINT in a format 12 CMAP, if
present, otherwise NIL. Assumes INDEX points to the element of the
CMAP arrays (END-CODES etc) corresponding to code-point."
  (with-slots (end-codes start-codes glyph-starts)
      cmap
    (declare (type (simple-array (unsigned-byte 32))
                   end-codes start-codes glyph-starts))
    (let ((start-code (aref start-codes index))
          (start-glyph-id (aref glyph-starts index)))
      (if (< code-point start-code)
          0
          (+ start-glyph-id (- code-point start-code))))))

(defgeneric code-point-font-index-from-cmap (code-point cmap)
  (:documentation "Return the index of the Unicode CODE-POINT in
CMAP, if present, otherwise NIL.")
  (:method (code-point (cmap unicode-cmap))
    (with-slots (end-codes)
        cmap
      (declare (type cmap-value-table end-codes))
      (dotimes (i (segment-count cmap) 1)
        (when (<= code-point (aref end-codes i))
          (return (%decode-format-4-cmap-code-point-index code-point cmap i))))))
  (:method (code-point (cmap format-12-cmap))
    (with-slots (end-codes)
        cmap
      (declare (type (simple-array (unsigned-byte 32)) end-codes))
      (dotimes (i (group-count cmap) 1)
        (when (<= code-point (aref end-codes i))
          (return
            (%decode-format-12-cmap-code-point-index code-point cmap i)))))))

(defmethod invert-character-map (font-loader)
  "Return a vector mapping font indexes to code points."
  (with-slots (start-codes end-codes)
      (character-map font-loader)
    (let ((points (make-array (glyph-count font-loader) :initial-element -1))
          (cmap (character-map font-loader)))
      (dotimes (i (length end-codes) points)
        (loop for j from (aref start-codes i) to (aref end-codes i)
              for font-index
                = (typecase cmap
                    (unicode-cmap
                     (%decode-format-4-cmap-code-point-index j cmap i))
                    (format-12-cmap
                     (%decode-format-12-cmap-code-point-index j cmap i))
                    (t
                     (code-point-font-index-from-cmap j cmap)))
              when (minusp (svref points font-index))
                do (setf (svref points font-index) j))))))


(defgeneric code-point-font-index (code-point font-loader)
  (:documentation "Return the index of the Unicode CODE-POINT in
FONT-LOADER, if present, otherwise NIL.")
  (:method (code-point font-loader)
    (code-point-font-index-from-cmap code-point (character-map font-loader))))

(defgeneric font-index-code-point (glyph-index font-loader)
  (:documentation "Return the code-point for a given glyph index.")
  (:method (glyph-index font-loader)
    (let ((point (aref (inverse-character-map font-loader) glyph-index)))
      (if (plusp point)
          point
          0))))

(defun %load-cmap-info (font-loader platform specific)
  (seek-to-table "cmap" font-loader)
  (with-slots (input-stream)
      font-loader
    (let ((start-pos (file-position input-stream))
          (version-number (read-uint16 input-stream))
          (subtable-count (read-uint16 input-stream))
          (foundp nil))
      (declare (ignore version-number))
      (loop repeat subtable-count
            for platform-id = (read-uint16 input-stream)
            for platform-specific-id = (read-uint16 input-stream)
            for offset = (+ start-pos (read-uint32 input-stream))
            when (and (= platform-id platform)
                      (or (eql platform-specific-id specific)
                          (and (consp specific)
                               (member platform-specific-id specific))))
            do
            (file-position input-stream offset)
            (setf (character-map font-loader) (load-unicode-cmap input-stream))
            (setf (inverse-character-map font-loader)
                  (invert-character-map font-loader)
                  foundp t)
            (return))
      foundp)))

(defun %unknown-cmap-error (font-loader)
  (seek-to-table "cmap" font-loader)
  (with-slots (input-stream)
      font-loader
    (let ((start-pos (file-position input-stream))
          (version-number (read-uint16 input-stream))
          (subtable-count (read-uint16 input-stream))
          (cmaps nil))
      (declare (ignore version-number))
      (loop repeat subtable-count
            for platform-id = (read-uint16 input-stream)
            for platform-specific-id = (read-uint16 input-stream)
            for offset = (+ start-pos (read-uint32 input-stream))
            for pos = (file-position input-stream)
            do (file-position input-stream offset)
               (push (list (platform-id-name platform-id)
                           (encoding-id-name platform-id platform-specific-id)
                           :type (read-uint16 input-stream))
                     cmaps)
               (file-position input-stream pos))
      (error "Could not find supported character map in font file~% available cmap tables = ~s"
             cmaps))))

(defmethod load-cmap-info ((font-loader font-loader))
  (or (%load-cmap-info font-loader +unicode-platform-id+
                       +unicode-2.0-full-encoding-id+) ;; full unicode
      (%load-cmap-info font-loader +microsoft-platform-id+
                       +microsoft-unicode-ucs4-encoding-id+) ;; full unicode
      (%load-cmap-info font-loader +microsoft-platform-id+
                       +microsoft-unicode-bmp-encoding-id+) ;; bmp
      (%load-cmap-info font-loader +unicode-platform-id+
                       +unicode-2.0-encoding-id+) ;; bmp
      (%load-cmap-info font-loader +unicode-platform-id+
                       '(0 1 2 3 4)) ;; all except variation and last-resort
      (%load-cmap-info font-loader +microsoft-platform-id+
                       +microsoft-symbol-encoding-id+) ;; ms symbol
      (%unknown-cmap-error font-loader)))

(defun available-character-maps (loader)
  (seek-to-table "cmap" loader)
  (let ((stream (input-stream loader)))
    (let ((start-pos (file-position stream))
          (version-number (read-uint16 stream))
          (subtable-count (read-uint16 stream)))
      (declare (ignore start-pos))
      (assert (zerop version-number))
      (dotimes (i subtable-count)
        (let ((platform-id (read-uint16 stream))
              (encoding-id (read-uint16 stream))
              (offset (read-uint32 stream)))
          (declare (ignore offset))
          (format t "~D (~A) - ~D (~A)~%"
                  platform-id (platform-id-name platform-id)
                  encoding-id (encoding-id-name platform-id encoding-id)))))))