File: unicode-normalization.pure.lisp

package info (click to toggle)
sbcl 2%3A2.6.2-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 52,008 kB
  • sloc: lisp: 535,135; ansic: 42,629; sh: 5,737; asm: 2,406; pascal: 717; makefile: 432; python: 56; cpp: 27
file content (129 lines) | stat: -rw-r--r-- 4,667 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
;;;; This software is part of the SBCL system. See the README file for
;;;; more information.
;;;;
;;;; While most of SBCL is derived from the CMU CL system, the test
;;;; files (like this one) were written from scratch after the fork
;;;; from CMU CL.
;;;;
;;;; This software is in the public domain and is provided with
;;;; absolutely no warranty. See the COPYING and CREDITS files for
;;;; more information.

(use-package :sb-unicode)

(defun parse-one-line (line)
  (do* ((i 0 (1+ i))
        (start 0 (1+ end))
        (end (position #\; line :start start) (position #\; line :start start))
        result)
       ((= i 5) (nreverse result))
    (with-input-from-string (s (subseq line start (1+ end)))
      (let ((*read-base* 16.))
        (push (map 'string 'code-char (read-delimited-list #\; s)) result)))))

(defmacro assert-all-string= (base &body others)
  `(progn
     ,@(loop for test in others
          collect `(assert (string= ,base ,test)))))

(defun test-line (c1 c2 c3 c4 c5)
  ;; NFC
  (assert-all-string= c2
    (normalize-string c1 :nfc)
    (normalize-string c2 :nfc)
    (normalize-string c3 :nfc))
  (assert-all-string= c4
    (normalize-string c4 :nfc)
    (normalize-string c5 :nfc))

  ;; NFD
  (assert-all-string= c3
    (normalize-string c1 :nfd)
    (normalize-string c2 :nfd)
    (normalize-string c3 :nfd))
  (assert-all-string= c5
    (normalize-string c4 :nfd)
    (normalize-string c5 :nfd))

  ;; NFKC
  (assert-all-string= c4
    (normalize-string c1 :nfkc)
    (normalize-string c2 :nfkc)
    (normalize-string c3 :nfkc)
    (normalize-string c4 :nfkc)
    (normalize-string c5 :nfkc))

  ;; NFKD
  (assert-all-string= c5
    (normalize-string c1 :nfkd)
    (normalize-string c2 :nfkd)
    (normalize-string c3 :nfkd)
    (normalize-string c4 :nfkd)
    (normalize-string c5 :nfkd)))

(defun test-no-normalization (string)
  (assert-all-string= string
    (normalize-string string :nfc)
    (normalize-string string :nfd)
    (normalize-string string :nfkc)
    (normalize-string string :nfkd)))

(defun test-normalization ()
  (declare (optimize (debug 2)))
  (with-open-file (s "data/NormalizationTest.txt" :external-format :latin1)
    (do ((line (read-line s) (read-line s)))
        ((char/= #\# (char line 0))
         (assert (string= "@Part0" line :end2 6))
         (assert (char= #\# (char (read-line s) 0)))))
    ;; Part0: specific cases
    (with-test (:name (:unicode-normalization :part0)
                      :skipped-on (not :sb-unicode))
      (do ((line (read-line s) (read-line s)))
          ((char= #\# (char line 0))
           (assert (string= "@Part1" (read-line s) :end2 6))
           (assert (char= #\# (char (read-line s) 0)))
           (assert (char= #\# (char (read-line s) 0))))
        (destructuring-bind (c1 c2 c3 c4 c5)
            (parse-one-line line)
          (test-line c1 c2 c3 c4 c5))))
    ;; Part1: single characters.  (Extra work to check for conformance
    ;; on unlisted entries)
    (with-test (:name (:unicode-normalization :part1)
                      :skipped-on (not :sb-unicode))
      (do ((line (read-line s) (read-line s))
           (code 0))
          ((char= #\# (char line 0))
           (do ((code code (1+ code)))
               ((= code #x110000))
             (test-no-normalization (string (code-char code))))
           (assert (string= "@Part2" (read-line s) :end2 6))
           (assert (char= #\# (char (read-line s) 0))))
        (destructuring-bind (c1 c2 c3 c4 c5)
            (parse-one-line line)
          (do ((c code (1+ c)))
              ((= c (char-code (char c1 0)))
               (test-line c1 c2 c3 c4 c5)
               (setf code (1+ c)))
            (test-no-normalization (string (code-char code)))))))
    ;; Part2: Canonical Order Test
    (with-test (:name (:unicode-normalization :part2)
                      :skipped-on (not :sb-unicode))
      (do ((line (read-line s) (read-line s)))
          ((char= #\# (char line 0))
           (assert (string= "@Part3" (read-line s) :end2 6))
           (assert (char= #\# (char (read-line s) 0))))
        (destructuring-bind (c1 c2 c3 c4 c5)
            (parse-one-line line)
          (test-line c1 c2 c3 c4 c5))))
    ;; Part3: PRI #29 Test
    (with-test (:name (:unicode-normalization :part3)
                      :skipped-on (not :sb-unicode))
      (do ((line (read-line s) (read-line s)))
          ((char= #\# (char line 0))
           (assert (char= #\# (char (read-line s) 0)))
           (assert (null (read-line s nil nil))))
        (destructuring-bind (c1 c2 c3 c4 c5)
            (parse-one-line line)
          (test-line c1 c2 c3 c4 c5))))))

(test-normalization)