File: read-utf8.lisp

package info (click to toggle)
acl2 3.1-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 36,712 kB
  • ctags: 38,396
  • sloc: lisp: 464,023; makefile: 5,470; sh: 86; csh: 47; cpp: 25; ansic: 22
file content (607 lines) | stat: -rw-r--r-- 26,547 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
;; Processing Unicode Files with ACL2
;; Copyright (C) 2005-2006 by Jared Davis <jared@cs.utexas.edu>
;;
;; This program is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 2 of the License, or (at your option)
;; any later version.
;;
;; This program is distributed in the hope that it will be useful but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
;; more details.
;;
;; You should have received a copy of the GNU General Public License along with
;; this program; if not, write to the Free Software Foundation, Inc., 59 Temple
;; Place - Suite 330, Boston, MA 02111-1307, USA.


(in-package "ACL2")
(include-book "utf8-decode")
(include-book "take-bytes")
(local (include-book "open-input-channel"))
(local (include-book "close-input-channel"))
(set-state-ok t)


;; We now want to recreate our utf8=>ustring function but directly using the
;; file reading operations.  We begin by writing equivalents of "take" and 
;; "nthcdr" that operate on byte streams.  We will use (read-byte$-all ...)
;; effectively as the file's contents, and relate all of these operations to
;; it.

(defund read-utf8-fast (channel state acc)
  (declare (xargs :guard (and (state-p state)
                              (symbolp channel)
                              (open-input-channel-p channel :byte state)
                              (ustring? acc))
                  :measure (file-measure channel state)
                  :verify-guards nil))
  (mbe 
   :logic
   (if (and (state-p state)
            (symbolp channel)
            (open-input-channel-p channel :byte state))
       (mv-let (x1 state)
               (read-byte$ channel state)
               (if (not x1)
                   (mv (reverse acc) state)
                 (let ((len1 (utf8-table35-expected-length x1)))
                   (if (not len1)
                       (mv 'fail state)
                     (mv-let (x2-x4 state)
                             (take-bytes (1- len1) channel state)
                             (let* ((x1-x4 (cons x1 x2-x4))
                                    (first (utf8-char=>uchar x1-x4)))
                               (if (not first)
                                   (mv 'fail state)
                                 (read-utf8-fast channel state (cons first acc)))))))))
     (mv 'fail state))
   :exec
   (mv-let 
    (x1 state) 
    (read-byte$ channel state)
    (if (not x1)
        (mv (reverse acc) state)
      (cond 
       
       ((<= (the-fixnum x1) 127)
        ;; Expected length 1.  We don't need to do any further checking; we can
        ;; just recur very quickly.  Note that this will give us very good
        ;; performance for English text, where characters are typically only a
        ;; single byte.
        (read-utf8-fast channel state (cons x1 acc)))

       ((in-range? (the-fixnum x1) 194 223)
        ;; Expected length 2.  (We excluded 192,193 because they are not
        ;; permitted under Table 3-6.)
        (mv-let (x2 state) (read-byte$ channel state)
           (if (and x2 (in-range? (the-fixnum x2) 128 191))
               ;; Manually-inlined utf8-combine2 operation.
               (read-utf8-fast 
                channel state
                (cons 
                 (the-fixnum
                  (logior 
                   (the-fixnum (ash (the-fixnum (logand (the-fixnum x1) 31)) 6))
                   (the-fixnum (logand (the-fixnum x2) 63))))
                 acc))
             (mv 'fail state))))
       
       ((in-range? (the-fixnum x1) 224 239)
        ;; Expected length 3.  (We cover all options here.)
        (mv-let (x2 state) (read-byte$ channel state)
         (mv-let (x3 state) (read-byte$ channel state)
           (if (and x2 x3
                    (cond ((= (the-fixnum x1) 224)  
                           (in-range? (the-fixnum x2) 160 191))
                          ((= (the-fixnum x1) 237)  
                           (in-range? (the-fixnum x2) 128 159))
                          (t 
                           (in-range? (the-fixnum x2) 128 191)))
                    (in-range? (the-fixnum x3) 128 191))
               (read-utf8-fast
                channel state
                (cons 
                 (the-fixnum
                  (logior 
                   (the-fixnum 
                    (ash (the-fixnum (logand (the-fixnum x1) 15)) 12))
                   (the-fixnum 
                    (logior
                     (the-fixnum 
                      (ash (the-fixnum (logand (the-fixnum x2) 63)) 6))
                     (the-fixnum (logand (the-fixnum x3) 63))))))
                 acc))
             (mv 'fail state)))))
                       
       ((in-range? (the-fixnum x1) 240 244)
        ;; Expected length 4.  (We only accept 240-244 because of Table 3-6;
        ;; i.e., we exclude 245, 246, and 247.
        (mv-let (x2 state) (read-byte$ channel state)
         (mv-let (x3 state) (read-byte$ channel state)
          (mv-let (x4 state) (read-byte$ channel state)
            (if (and x2 x3 x4
                     (cond ((= (the-fixnum x1) 240)
                            (in-range? (the-fixnum x2) 144 191))
                           ((= (the-fixnum x1) 244)
                            (in-range? (the-fixnum x2) 128 143))
                           (t 
                            (in-range? (the-fixnum x2) 128 191)))
                     (in-range? (the-fixnum x3) 128 191)
                     (in-range? (the-fixnum x4) 128 191))
                (read-utf8-fast 
                 channel state
                 (cons 
                  (the-fixnum
                   (logior 
                    (the-fixnum 
                     (ash (the-fixnum (logand (the-fixnum x1) 7)) 18))
                    (the-fixnum 
                     (logior 
                      (the-fixnum
                       (ash (the-fixnum (logand (the-fixnum x2) 63)) 12))
                      (the-fixnum
                       (logior 
                        (the-fixnum 
                         (ash (the-fixnum (logand (the-fixnum x3) 63)) 6))
                        (the-fixnum 
                         (logand (the-fixnum x4) 63))))))))
                  acc))
              (mv 'fail state))))))       
       
       ;; This is a little obscure.  As an optimization above, we did not
       ;; consider cases for first byte = 192, 193, 245, 246, and 247, because
       ;; these are not allowed under Table 3-6.
       ;;
       ;; However, utf8-table35-expected-length predics the lengths of these
       ;; as 2, 2, 4, 4, and 4, respectively.  So, for our MBE equivalence, we 
       ;; need to make sure to advance the stream just like we do in the
       ;; :logic mode.
       ((or (= (the-fixnum x1) 192)
            (= (the-fixnum x1) 193))
        (mv-let (x2 state)
                (read-byte$ channel state)
                (declare (ignore x2))
                (mv 'fail state)))

       ((or (= (the-fixnum x1) 245)
            (= (the-fixnum x1) 246)
            (= (the-fixnum x1) 247))
        (mv-let (x2 state)
                (read-byte$ channel state)
                (declare (ignore x2))
                (mv-let (x3 state)
                        (read-byte$ channel state)
                        (declare (ignore x3))
                        (mv-let (x4 state)
                                (read-byte$ channel state)
                                (declare (ignore x4))
                                (mv 'fail state)))))

       (t 
        (mv 'fail state)))))))

(defthm state-p1-of-mv-nth-1-of-read-utf8-fast
  (implies (and (force (state-p1 state))
                (force (open-input-channel-p1 channel :byte state))
                (force (symbolp channel)))
           (state-p1 (mv-nth 1 (read-utf8-fast channel state acc))))
  :hints(("Goal" :in-theory (enable read-utf8-fast))))

(defthm open-input-channel-p1-of-mv-nth-1-of-read-utf8-fast
  (implies (and (force (state-p1 state))
                (force (open-input-channel-p1 channel :byte state))
                (force (symbolp channel)))
           (open-input-channel-p1 channel :byte 
                                  (mv-nth 1 (read-utf8-fast channel state acc))))
  :hints(("Goal" :in-theory (enable read-utf8-fast))))


;; Correctness of read-utf8-fast
;; 
;; We think of (read-byte$-all channel state) as returning the file's contents.
;; We will show that under the appropriate hypotheses, the data returned by
;; (read-utf8-fast channel state acc) is exactly the same as what we would get
;; if we were to first read the entire file's contents, and then apply our UTF8
;; decoding function, utf8=>ustring, to the result.
;;
;; The proof is sort of cute.  We rewrite everything to be in terms of
;; read-byte$-all.  For example, we first show that read-byte$ is nothing more
;; than the car of read-byte$-all.  Similarly, in take-bytes.lisp, we have
;; shown that take-bytes is just the simpler-take of read-byte$-all.

(local (defthm car-of-read-byte$
         (implies (and (force (state-p state))
                       (force (symbolp channel))
                       (force (open-input-channel-p channel :byte state)))
                  (equal (car (read-byte$ channel state))
                         (caar (read-byte$-all channel state))))
         :hints(("Goal" :in-theory (enable read-byte$-all)))))

(local (theory-invariant
        (incompatible (:rewrite car-of-read-byte$)
                      (:definition read-byte$-all))))

(local (defthm read-byte$-all-of-mv-nth-1-of-read-byte$
         (implies (and (force (state-p state))
                       (force (symbolp channel))
                       (force (open-input-channel-p channel :byte state)))
                  (equal 
                   (car (read-byte$-all channel (mv-nth 1 (read-byte$ channel state))))
                   (cdr (car (read-byte$-all channel state)))))
         :hints(("Goal" :in-theory (e/d (read-byte$-all)
                                        (car-of-read-byte$))))))

(local (defthm car-of-read-byte$-all-when-not-caar
         (implies (and (state-p1 state)
                       (symbolp channel)
                       (open-input-channel-p1 channel :byte state)
                       (not (caar (read-byte$-all channel state))))
                  (equal (car (read-byte$-all channel state))
                         nil))
         :hints(("goal" :in-theory (e/d (read-byte$-all)
                                        (car-of-read-byte$))))))

(defthm car-of-read-utf8-fast-is-utf8=>ustring-fast-of-read-byte$-all
  (implies (and (force (state-p1 state))
                (force (open-input-channel-p1 channel :byte state))
                (force (symbolp channel))
                (true-listp acc))
           (equal (car (read-utf8-fast channel state acc))
                  (utf8=>ustring-fast (car (read-byte$-all channel state))
                                      acc)))
  :hints(("Goal" 
          :in-theory (e/d (read-utf8-fast utf8=>ustring-fast)
                          (nthcdr-bytes-2
                           nthcdr-bytes-3
                           nthcdr-bytes-4))
          :induct (read-utf8-fast channel state acc))))





;; Guard verification for read-utf8-fast.
;;
;; This is really messy, because the MBE equivalence is so dramatic.  Note that
;; a lot of this is the same as what we needed for utf8=>ustring-fast.

(encapsulate
 ()

 (local (defthm terrible-lemma-1
          (implies (and (integerp x)
                        (<= 0 x)
                        (<= x 127))
                   (uchar? x))
          :hints(("Goal" :in-theory (enable uchar?)))))

 (local (defthm terrible-lemma-2
          (IMPLIES (AND (force (integerp x1))
                        (force (integerp x2))
                        (< 127 X1)
                        (<= 194 X1)
                        (<= X1 223)
                        (<= 128 X2)
                        (<= X2 191))
                   (UCHAR? (LOGIOR (ASH (LOGAND X1 31) 6)
                                   (LOGAND X2 63))))
          :hints(("Goal" 
                  :in-theory (enable utf8-combine2-guard
                                     utf8-combine2
                                     utf8-table35-bytes
                                     utf8-table36-bytes)          
                  :use ((:instance uchar?-of-utf8-combine2))))))

 (local (defthm terrible-lemma-3
          (IMPLIES (AND (force (integerp x2))
                        (force (integerp x3))
                        (<= 160 X2)
                        (<= X2 191)
                        (<= 128 X3)
                        (<= X3 191))
                   (UCHAR? (LOGIOR 0 (ASH (LOGAND X2 63) 6)
                                   (LOGAND X3 63))))
          :hints(("Goal" 
                  :in-theory (enable utf8-combine3-guard
                                     utf8-combine3
                                     utf8-table35-bytes
                                     utf8-table36-bytes)
                  :use ((:instance uchar?-of-utf8-combine3
                                   (x1 224)))))))

 (local (defthm terrible-lemma-4
          (IMPLIES (AND (force (integerp X1))
                        (force (integerp X2))
                        (force (integerp X3))
                        (<= 224 X1)
                        (<= X1 239)
                        (NOT (EQUAL X1 224))
                        (NOT (EQUAL X1 237))
                        (<= 128 X2)
                        (<= X2 191)
                        (<= 128 X3)
                        (<= X3 191))
                   (UCHAR? (LOGIOR (ASH (LOGAND X1 15) 12)
                                   (ASH (LOGAND X2 63) 6)
                                   (LOGAND X3 63))))
          :hints(("Goal" 
                  :in-theory (enable utf8-combine3-guard
                                     utf8-combine3
                                     utf8-table35-bytes
                                     utf8-table36-bytes)
                  :use ((:instance uchar?-of-utf8-combine3))))))

 (local (defthm terrible-lemma-5
          (IMPLIES (AND (force (integerp x2))
                        (force (integerp x3))
                        (<= 128 X2)
                        (<= X2 159)
                        (<= 128 X3)
                        (<= X3 191))
                   (UCHAR? (LOGIOR 53248 (ASH (LOGAND X2 63) 6)
                                   (LOGAND X3 63))))
          :hints(("Goal" 
                  :in-theory (enable utf8-combine3-guard
                                     utf8-combine3
                                     utf8-table35-bytes
                                     utf8-table36-bytes)
                  :use ((:instance uchar?-of-utf8-combine3
                                   (x1 237)))))))

 (local (defthm terrible-lemma-6
          (IMPLIES (AND (force (integerp x2))
                        (force (integerp x3))
                        (force (integerp x4))
                        (<= 144 X2)
                        (<= X2 191)
                        (<= 128 X3)
                        (<= X3 191)
                        (<= 128 X4)
                        (<= X4 191))
                   (UCHAR? (LOGIOR 0 (ASH (LOGAND X2 63) 12)
                                   (ASH (LOGAND X3 63) 6)
                                   (LOGAND X4 63))))
          :hints(("Goal" 
                  :in-theory (enable utf8-combine4-guard
                                     utf8-combine4
                                     utf8-table35-bytes
                                     utf8-table36-bytes)
                  :use ((:instance uchar?-of-utf8-combine4
                                   (x1 240)))))))
          
 (local (defthm terrible-lemma-7
          (IMPLIES (AND (force (integerp x1))
                        (force (integerp x2))
                        (force (integerp x3))
                        (force (integerp x4))
                        (<= 240 X1)
                        (<= X1 244)
                        (NOT (EQUAL X1 240))
                        (NOT (EQUAL X1 244))
                        (<= 128 X2)
                        (<= X2 191)
                        (<= 128 X3)
                        (<= X3 191)
                        (<= 128 X4)
                        (<= X4 191))
                   (UCHAR? (LOGIOR (ASH (LOGAND X1 7) 18)
                                   (ASH (LOGAND X2 63) 12)
                                   (ASH (LOGAND X3 63) 6)
                                   (LOGAND X4 63))))
          :hints(("Goal" 
                  :in-theory (enable utf8-combine4-guard
                                     utf8-combine4
                                     utf8-table35-bytes
                                     utf8-table36-bytes)
                  :use ((:instance uchar?-of-utf8-combine4))))))
                 
 (local (defthm terrible-lemma-8
          (IMPLIES (AND (force (integerp x2))
                        (force (integerp x3))
                        (force (integerp x4))
                        (<= 128 x2)
                        (<= x2 143)
                        (<= 128 x3)
                        (<= x3 191)
                        (<= 128 x4)
                        (<= x4 191))
                   (UCHAR? (LOGIOR 1048576 (ASH (LOGAND x2 63) 12)
                                   (ASH (LOGAND x3 63) 6)
                                   (LOGAND x4 63))))
          :hints(("Goal" 
                  :in-theory (enable utf8-combine4-guard
                                     utf8-combine4
                                     utf8-table35-bytes
                                     utf8-table36-bytes)
                  :use ((:instance uchar?-of-utf8-combine4
                                   (x1 244)))))))

 (local (include-book "signed-byte-listp"))

 (local (defthm unsigned-byte-listp-8-of-car-of-read-byte$-all-forward
          (implies (and (force (state-p1 state))
                        (force (open-input-channel-p1 channel :byte state))
                        (force (symbolp channel)))
                   (unsigned-byte-listp 8 (car (read-byte$-all channel state))))
          :rule-classes ((:forward-chaining :trigger-terms ((read-byte$-all channel state))))))

 (local (defthm unsigned-byte-listp-8-of-cdr-when-unsigned-byte-listp-8
          (implies (unsigned-byte-listp 8 x)
                   (unsigned-byte-listp 8 (cdr x)))
          :rule-classes ((:forward-chaining))))

 (local (defthm crock
          (implies (unsigned-byte-listp bytes x)
                   (iff (consp x) 
                        x))))

 (local (defthm hideous-lemma-1
          (implies (and (force (state-p1 state))
                        (force (open-input-channel-p1 channel :byte state))
                        (force (symbolp channel))
                        (car (read-byte$-all channel state)))
                   (unsigned-byte-p 8 (caar (read-byte$-all channel state))))
          :rule-classes ((:rewrite)
                         (:forward-chaining 
                          :trigger-terms ((car (read-byte$-all channel state)))))))

 (local (defthm hideous-lemma-2
          (implies (and (force (state-p1 state))
                        (force (open-input-channel-p1 channel :byte state))
                        (force (symbolp channel))
                        (cdar (read-byte$-all channel state)))
                   (unsigned-byte-p 8 (cadar (read-byte$-all channel state))))
          :rule-classes ((:rewrite)
                         (:forward-chaining 
                          :trigger-terms ((cdar (read-byte$-all channel state)))))))

 (local (defthm hideous-lemma-3
          (implies (and (force (state-p1 state))
                        (force (open-input-channel-p1 channel :byte state))
                        (force (symbolp channel))
                        (cddar (read-byte$-all channel state)))
                   (unsigned-byte-p 8 (caddar (read-byte$-all channel state))))
          :rule-classes ((:rewrite)
                         (:forward-chaining 
                          :trigger-terms ((cddar (read-byte$-all channel state)))))
          :hints(("Goal"
                  :in-theory (e/d (read-byte$-all)
                                  (car-of-read-byte$))
                  :expand ((read-byte$-all channel state)
                           (read-byte$-all channel (mv-nth 1 (read-byte$ channel state)))
                           (read-byte$-all 
                            channel 
                            (mv-nth 1 (read-byte$ channel 
                                                  (mv-nth 1 (read-byte$ channel
                                                                        state))))))))))

 (local (defthm hideous-lemma-4
          (implies (and (force (state-p1 state))
                        (force (open-input-channel-p1 channel :byte state))
                        (force (symbolp channel))
                        (cdddar (read-byte$-all channel state)))
                   (unsigned-byte-p 8 (car (cdddar (read-byte$-all channel state)))))
          :rule-classes ((:rewrite)
                         (:forward-chaining 
                          :trigger-terms ((cdddar (read-byte$-all channel state)))))
          :hints(("Goal"
                  :in-theory (e/d (read-byte$-all)
                                  (car-of-read-byte$))
                  :expand ((read-byte$-all channel state)
                           (read-byte$-all channel 
                                           (mv-nth 1 (read-byte$ channel state)))
                           (read-byte$-all 
                            channel 
                            (mv-nth 1 (read-byte$ channel 
                                                  (mv-nth 1 (read-byte$ channel state)))))
                           (read-byte$-all 
                            channel 
                            (mv-nth 1 (read-byte$ 
                                       channel
                                       (mv-nth 1 (read-byte$ 
                                                  channel 
                                                  (mv-nth 1 (read-byte$ channel
                                                                        state))))))))))))

 (local (defthm integerp-when-unsigned-byte-p-8
          (implies (unsigned-byte-p 8 x)
                   (integerp x))))

 (local (defthm signed-byte-p-from-unsigned-byte-p-8
          (implies (and (unsigned-byte-p 8 x)
                        (< 8 (nfix n)))
                   (signed-byte-p n x))))

 (local (defthm len-zero-when-true-listp
          (implies (true-listp x)
                   (equal (equal (len x) 0)
                          (not x)))))

 (local (defthm integer-squeeze-lemma
          (implies (and (syntaxp (quotep n))
                        (integerp n)
                        (< (1- n) x)
                        (< x (1+ n)))
                   (equal (equal x n) 
                          (integerp x)))
          :rule-classes ((:rewrite :backchain-limit-lst 1))))

 (local (defthm unsigned-byte-p-8-when-valid-integer
          (implies (and (<= 0 x)
                        (< x 255))
                   (equal (unsigned-byte-p 8 x)
                          (integerp x)))
          :rule-classes ((:rewrite :backchain-limit-lst 1))
          :hints(("Goal" :in-theory (enable unsigned-byte-p)))))

 (local (include-book "arithmetic-3/bind-free/top" :dir :system))

 (local (defthm nthcdr-bytes-hack
          (implies (and (force (state-p1 state))
                        (force (open-input-channel-p1 channel :byte state))
                        (force (symbolp channel))
                        (force (natp n)))
                   (equal (nthcdr-bytes n channel (mv-nth 1 (read-byte$ channel state)))
                          (nthcdr-bytes (+ 1 n) channel state)))
          :hints(("Goal" 
                  :expand (nthcdr-bytes (+ 1 n) channel state)
                  :in-theory (enable nthcdr-bytes)
                  :do-not-induct t))))

 (local (in-theory (enable unsigned-byte-listp
                           utf8-char=>uchar
                           utf8-table35-bytes
                           utf8-table36-bytes
                           utf8-combine2
                           utf8-combine3
                           utf8-combine4
                           utf8-combine2-guard
                           utf8-combine3-guard
                           utf8-combine4-guard)))

 (verify-guards read-utf8-fast
                :hints(("Subgoal 2"
                        :in-theory (disable unsigned-byte-p-8-when-valid-integer
                                            nthcdr-bytes-hack))))

 )




(defun read-utf8 (filename state)
  (declare (xargs :guard (and (state-p state)
                              (stringp filename))
                  :stobjs state))
  (mv-let (channel state)
          (open-input-channel filename :byte state)
          (if channel
              (mv-let (data state)
                      (read-utf8-fast channel state nil)
                      (let ((state (close-input-channel channel state)))
                        (mv data state)))
            (mv "Error opening file." state))))

(defthm state-p1-of-mv-nth-1-of-read-utf8
  (implies (and (force (state-p1 state))
                (force (stringp filename)))
           (state-p1 (mv-nth 1 (read-utf8 filename state))))
  :hints(("Goal" :in-theory (enable read-utf8))))

(defthm car-of-read-utf8-when-file-cannot-be-opened
  (implies (and (force (state-p1 state))
                (force (stringp filename))
                (stringp (car (read-file-bytes filename state))))
           (equal (car (read-utf8 filename state))
                  (car (read-file-bytes filename state))))
  :hints(("Goal" :in-theory (enable read-file-bytes read-utf8))))
           
(defthm car-of-read-utf8-when-file-can-be-opened
  (implies (and (force (state-p1 state))
                (force (stringp filename))
                (not (stringp (car (read-file-bytes filename state)))))
           (equal (car (read-utf8 filename state))
                  (utf8=>ustring (car (read-file-bytes filename state)))))
  :hints(("Goal" :in-theory (enable read-utf8 read-file-bytes))))