File: external-format.impure.lisp

package info (click to toggle)
sbcl 1%3A0.9.16.0-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 19,960 kB
  • ctags: 16,537
  • sloc: lisp: 231,164; ansic: 19,558; asm: 2,539; sh: 1,925; makefile: 308
file content (283 lines) | stat: -rw-r--r-- 12,850 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
;;;; This file is for testing external-format functionality, using
;;;; test machinery which might have side effects (e.g.  executing
;;;; DEFUN, writing files).  Note that the tests here reach into
;;;; unexported functionality, and should not be used as a guide for
;;;; users.

;;;; This software is part of the SBCL system. See the README file for
;;;; more information.
;;;;
;;;; While most of SBCL is derived from the CMU CL system, the test
;;;; files (like this one) were written from scratch after the fork
;;;; from CMU CL.
;;;;
;;;; This software is in the public domain and is provided with
;;;; absolutely no warranty. See the COPYING and CREDITS files for
;;;; more information.

(defmacro do-external-formats ((xf &optional result) &body body)
  (let ((nxf (gensym)))
    `(dolist (,nxf sb-impl::*external-formats* ,result)
       (let ((,xf (first (first ,nxf))))
         ,@body))))

(do-external-formats (xf)
  (with-open-file (s #-win32 "/dev/null" #+win32 "nul" :direction :input :external-format xf)
    (assert (eq (read-char s nil s) s))))

;;; Test standard character read-write equivalency over all external formats.
(let ((standard-characters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'(),_-./:;?+<=>#%&*@[\\]{|}`^~"))
  (do-external-formats (xf)
    (with-open-file (s "external-format-test.txt" :direction :output
                     :if-exists :supersede :external-format xf)
      (loop for character across standard-characters
            do (write-char character s)))
    (with-open-file (s "external-format-test.txt" :direction :input
                     :external-format xf)
      (loop for character across standard-characters
            do (assert (eql (read-char s) character))))))

(delete-file "external-format-test.txt")
#-sb-unicode
(progn
  (test-util:report-test-status)
  (sb-ext:quit :unix-status 104))

;;; Test UTF-8 writing and reading of 1, 2, 3 and 4 octet characters with
;;; all possible offsets. Tests for buffer edge bugs. fd-stream buffers are
;;; 4096 wide.
(dotimes (width-1 4)
  (let ((character (code-char (elt '(1 #x81 #x801 #x10001) width-1))))
    (dotimes (offset (+ width-1 1))
      (with-open-file (s "external-format-test.txt" :direction :output
                       :if-exists :supersede :external-format :utf-8)
        (dotimes (n offset)
          (write-char #\a s))
        (dotimes (n 4097)
          (write-char character s)))
      (with-open-file (s "external-format-test.txt" :direction :input
                       :external-format :utf-8)
        (dotimes (n offset)
          (assert (eql (read-char s) #\a)))
        (dotimes (n 4097)
          (assert (eql (read-char s) character)))
        (assert (eql (read-char s nil s) s))))))

;;; Test character decode restarts.
(with-open-file (s "external-format-test.txt" :direction :output
                 :if-exists :supersede :element-type '(unsigned-byte 8))
  (write-byte 65 s)
  (write-byte 66 s)
  (write-byte #xe0 s)
  (write-byte 67 s))
(with-open-file (s "external-format-test.txt" :direction :input
                 :external-format :utf-8)
  (handler-bind
      ((sb-int:character-decoding-error #'(lambda (decoding-error)
                                            (declare (ignore decoding-error))
                                            (invoke-restart
                                             'sb-int:attempt-resync))))
    (assert (equal (read-line s nil s) "ABC"))
    (assert (equal (read-line s nil s) s))))
(with-open-file (s "external-format-test.txt" :direction :input
                 :external-format :utf-8)
  (handler-bind
      ((sb-int:character-decoding-error #'(lambda (decoding-error)
                                            (declare (ignore decoding-error))
                                            (invoke-restart
                                             'sb-int:force-end-of-file))))
    (assert (equal (read-line s nil s) "AB"))
    (assert (equal (read-line s nil s) s))))

;;; And again with more data to account for buffering (this was briefly)
;;; broken in early 0.9.6.
(with-open-file (s "external-format-test.txt" :direction :output
                 :if-exists :supersede :element-type '(unsigned-byte 8))
  (let ((a (make-array 50
                       :element-type '(unsigned-byte 64)
                       :initial-contents (map 'list #'char-code
                                              "1234567890123456789012345678901234567890123456789."))))
    (setf (aref a 49) (char-code #\Newline))
    (dotimes (i 40)
      (write-sequence a s))
    (write-byte #xe0 s)
    (dotimes (i 40)
      (write-sequence a s))))
(with-test (:name (:character-decode-large :attempt-resync))
  (with-open-file (s "external-format-test.txt" :direction :input
                     :external-format :utf-8)
    (handler-bind
        ((sb-int:character-decoding-error #'(lambda (decoding-error)
                                              (declare (ignore decoding-error))
                                              (invoke-restart
                                               'sb-int:attempt-resync)))
         ;; The failure mode is an infinite loop, add a timeout to detetct it.
         (sb-ext:timeout (lambda () (error "Timeout"))))
      (sb-ext:with-timeout 5
        (dotimes (i 80)
          (assert (equal (read-line s nil s)
                         "1234567890123456789012345678901234567890123456789")))))))

(with-test (:name (:character-decode-large :force-end-of-file)
            :fails-on :sbcl)
  (error "We can't reliably test this due to WITH-TIMEOUT race condition")
  ;; This test will currently fail. But sometimes it will fail in
  ;; ungracefully due to the WITH-TIMEOUT race mentioned above. This
  ;; rightfully confuses some people, so we'll skip running the code
  ;; for now. -- JES, 2006-01-27
  #+nil
  (with-open-file (s "external-format-test.txt" :direction :input
                     :external-format :utf-8)
    (handler-bind
        ((sb-int:character-decoding-error #'(lambda (decoding-error)
                                              (declare (ignore decoding-error))
                                              (invoke-restart
                                               'sb-int:force-end-of-file)))
         ;; The failure mode is an infinite loop, add a timeout to detetct it.
         (sb-ext:timeout (lambda () (error "Timeout"))))
      (sb-ext:with-timeout 5
        (dotimes (i 80)
          (assert (equal (read-line s nil s)
                         "1234567890123456789012345678901234567890123456789")))
        (assert (equal (read-line s nil s) s))))))

;;; Test character encode restarts.
(with-open-file (s "external-format-test.txt" :direction :output
                 :if-exists :supersede :external-format :latin-1)
  (handler-bind
      ((sb-int:character-encoding-error #'(lambda (encoding-error)
                                            (declare (ignore encoding-error))
                                            (invoke-restart
                                             'sb-impl::output-nothing))))
    (write-char #\A s)
    (write-char #\B s)
    (write-char (code-char 322) s)
    (write-char #\C s)))
(with-open-file (s "external-format-test.txt" :direction :input
                 :external-format :latin-1)
  (assert (equal (read-line s nil s) "ABC"))
  (assert (equal (read-line s nil s) s)))

(with-open-file (s "external-format-test.txt" :direction :output
                 :if-exists :supersede :external-format :latin-1)
  (handler-bind
      ((sb-int:character-encoding-error #'(lambda (encoding-error)
                                            (declare (ignore encoding-error))
                                            (invoke-restart
                                             'sb-impl::output-nothing))))
    (let ((string (make-array 4 :element-type 'character
                              :initial-contents `(#\A #\B ,(code-char 322)
                                                      #\C))))
      (write-string string s))))
(with-open-file (s "external-format-test.txt" :direction :input
                 :external-format :latin-1)
  (assert (equal (read-line s nil s) "ABC"))
  (assert (equal (read-line s nil s) s)))

;;; Test skipping character-decode-errors in comments.
(let ((s (open "external-format-test.lisp" :direction :output
               :if-exists :supersede :external-format :latin-1)))
  (unwind-protect
       (progn
         (write-string ";;; ABCD" s)
         (write-char (code-char 233) s)
         (terpri s)
         (close s)
         (compile-file "external-format-test.lisp" :external-format :utf-8))
    (delete-file s)
    (let ((p (probe-file (compile-file-pathname "external-format-test.lisp"))))
      (when p
        (delete-file p)))))


;;;; KOI8-R external format
(with-open-file (s "external-format-test.txt" :direction :output
                 :if-exists :supersede :external-format :koi8-r)
  (write-char (code-char #xB0) s)
  (assert (eq
           (handler-case
               (progn
                 (write-char (code-char #xBAAD) s)
                 :bad)
             (sb-int:character-encoding-error ()
               :good))
           :good)))
(with-open-file (s "external-format-test.txt" :direction :input
                 :element-type '(unsigned-byte 8))
  (let ((byte (read-byte s)))
    (assert (= (eval byte) #x9C))))
(with-open-file (s "external-format-test.txt" :direction :input
                 :external-format :koi8-r)
  (let ((char (read-char s)))
    (assert (= (char-code (eval char)) #xB0))))
(delete-file "external-format-test.txt")

(let* ((koi8-r-codes (coerce '(240 210 201 215 197 212 33) '(vector (unsigned-byte 8))))
       (uni-codes #(1055 1088 1080 1074 1077 1090 33))

       (string (octets-to-string koi8-r-codes :external-format :koi8-r))
       (uni-decoded (map 'vector #'char-code string)))
  (assert (equalp (map 'vector #'char-code (octets-to-string koi8-r-codes :external-format :koi8-r))
                  uni-codes))
  (assert (equalp (string-to-octets (map 'string #'code-char uni-codes) :external-format :koi8-r)
                  koi8-r-codes)))

;;; tests of FILE-STRING-LENGTH
(let ((standard-characters "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!$\"'(),_-./:;?+<=>#%&*@[\\]{|}`^~"))
  (do-external-formats (xf)
    (with-open-file (s "external-format-test.txt" :direction :output
                       :external-format xf)
      (loop for x across standard-characters
            for position = (file-position s)
            for char-length = (file-string-length s x)
            do (write-char x s)
            do (assert (= (file-position s) (+ position char-length))))
      (let ((position (file-position s))
            (string-length (file-string-length s standard-characters)))
        (write-string standard-characters s)
        (assert (= (file-position s) (+ position string-length)))))
    (delete-file "external-format-test.txt")))

(let ((char-codes '(0 1 255 256 511 512 1023 1024 2047 2048 4095 4096
                    8191 8192 16383 16384 32767 32768 65535 65536 131071
                    131072 262143 262144)))
  (with-open-file (s "external-format-test.txt" :direction :output
                     :external-format :utf-8)
    (dolist (code char-codes)
      (let* ((char (code-char code))
             (position (file-position s))
             (char-length (file-string-length s char)))
        (write-char char s)
        (assert (= (file-position s) (+ position char-length)))))
    (let* ((string (map 'string #'code-char char-codes))
           (position (file-position s))
           (string-length (file-string-length s string)))
      (write-string string s)
      (assert (= (file-position s) (+ position string-length))))))


;;; See sbcl-devel "Subject: Bug in FILE-POSITION on UTF-8-encoded files"
;;; by Lutz Euler on 2006-03-05 for more details.
(with-test (:name (:file-position :utf-8))
  (let ((path "external-format-test.txt"))
    (with-open-file (s path
                       :direction :output
                       :if-exists :supersede
                       :element-type '(unsigned-byte 8))
      ;; Write #\*, encoded in UTF-8, to the file.
      (write-byte 42 s)
      ;; Append #\adiaeresis, encoded in UTF-8, to the file.
      (write-sequence '(195 164) s))
    (with-open-file (s path :external-format :utf-8)
      (read-char s)
      (let ((pos (file-position s))
            (char (read-char s)))
        (format t "read character with code ~a successfully from file position ~a~%"
                (char-code char) pos)
        (file-position s pos)
        (format t "set file position back to ~a, trying to read-char again~%" pos)
        (let ((new-char (read-char s)))
          (assert (char= char new-char)))))
    (values)))

;;;; success