File: text-codec-util.scm

package info (click to toggle)
scheme48 1.8%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 14,980 kB
  • ctags: 14,127
  • sloc: lisp: 76,272; ansic: 71,514; sh: 3,026; makefile: 637
file content (43 lines) | stat: -rw-r--r-- 1,271 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
; Copyright (c) 2005-2006 by Basis Technology Corporation.  See file COPYING.

; Utilities for setting and constructing text codecs

; This follows Richard Gillam: Unicode Demystified, Chapter 6
; The BOM is U+FEFF---we look for at the beginning of the stream.
; Note if this fails, you better re-open the port from the start

; Note that the UTF-32 detection suggested in Gillam's book is not
; practical, as it may confuse valid UTF-16 with UTF-32.

(define (guess-port-text-codec-according-to-bom port)
  (let ((first (peek-byte port)))
    (case first
      ((#xfe)
       (read-byte port)
       (if (eqv? #xff (read-byte port))
	   utf-16be-codec
	   #f))
      ((#xff)
       (read-byte port)
       (if (eqv? #xfe (read-byte port))
	   utf-16le-codec
	   #f))
      ((#xef)
       (read-byte port)
       (if (and (eqv? #xbb (read-byte port))
		(eqv? #xbf (read-byte port)))
	   utf-8-codec
	   #f))
      (else
       #f))))

; The caller really should check the return code
(define (set-port-text-codec-according-to-bom! port)
  (cond
   ((guess-port-text-codec-according-to-bom port)
    => (lambda (text-codec)
	 (set-port-text-codec! port text-codec)
	 #t))
   (else
    #f)))