1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
; Part of Scheme 48 1.9. See file COPYING for notices and license.
; Authors: Mike Sperber, Robert Ransom
; You may think that file names / environment variables / user names
; etc. are just text, but on most platforms, that assumption is wrong:
; They are usually NUL-terminated byte strings in some format. The
; bytes are invariant, but the corresponding text may depend on the
; locale. Also, byte sequences without a textual representation are
; possible.
; We assume that OS strings are encoded in some conservative extension
; of NUL-terminated ASCII. On Unix, this assumption pretty much has
; to hold true because of the various constraints of locale handling
; there. The Windows API uses an extension of UTF-16 that includes
; unpaired surrogates. For this, we use a synthetic extension of
; UTF-8 called UTF-8of16 that also deals with unpaired surrogates.
; #### lossiness
(define-record-type os-string :os-string
(really-make-os-string text-codec string byte-vector)
os-string?
(text-codec os-string-text-codec)
; may be #f, will get cached value
(string os-string-string set-os-string-string!)
; may be #f, will get cached value
(byte-vector os-string-byte-vector set-os-string-byte-vector!))
(define-record-discloser :os-string
(lambda (oss)
(list "OS-string"
(text-codec-names (os-string-text-codec oss))
(os-string->string oss))))
(define *initial-os-string-text-codec* #f)
(define (initialize-os-string-text-codec!)
(set! *initial-os-string-text-codec*
(or (find-text-codec
(system-parameter (enum system-parameter-option os-string-encoding)))
us-ascii-codec)))
(define $os-string-text-codec
(make-fluid
(lambda () *initial-os-string-text-codec*)))
(define (current-os-string-text-codec)
((fluid $os-string-text-codec)))
(define (call-with-os-string-text-codec codec thunk)
(let-fluid $os-string-text-codec (lambda () codec)
thunk))
(define (make-os-string codec thing)
(call-with-values
(lambda ()
(cond
((string? thing)
(values (make-immutable! thing) #f))
((byte-vector? thing)
(values #f (make-immutable! (byte-vector-copy-z thing))))
(else
(assertion-violation 'make-os-string "invalid argument" thing))))
(lambda (str bv)
(really-make-os-string codec str bv))))
(define (string->os-string s)
(let ((c (string-copy s)))
(make-immutable! c)
(really-make-os-string (current-os-string-text-codec)
c #f)))
(define (byte-vector->os-string b)
(let ((c (byte-vector-copy-z b)))
(make-immutable! b)
(really-make-os-string (current-os-string-text-codec)
#f c)))
(define (os-string->byte-vector oss)
(or (os-string-byte-vector oss)
(let* ((string (os-string-string oss))
(codec (os-string-text-codec oss))
(size (string-encoding-length codec
string
0
(string-length string)))
(bytes (make-byte-vector (+ size 1) 0))) ; NUL termination
(encode-string codec
string 0 (string-length string)
bytes 0 size)
(set-os-string-byte-vector! oss bytes)
(make-immutable! bytes)
bytes)))
(define (os-string->string oss)
(or (os-string-string oss)
(let* ((bytes (os-string-byte-vector oss))
(size (- (byte-vector-length bytes) 1))
(codec (os-string-text-codec oss)))
(call-with-values
(lambda ()
(bytes-string-size codec bytes 0 size #f))
(lambda (status consumed-count decoded-count)
(let ((string (make-string decoded-count)))
(decode-string codec bytes 0 size
string 0 decoded-count
#\?)
(set-os-string-string! oss string)
(make-immutable! string)
string))))))
(define (x->os-string x)
(cond
((os-string? x) x)
((string? x) (string->os-string x))
((byte-vector? x) (byte-vector->os-string x))))
(define (os-string=? os1 os2)
(byte-vector=? (os-string->byte-vector os1) (os-string->byte-vector os2)))
; frequent idioms
(define (string->os-byte-vector s)
(os-string->byte-vector (string->os-string s)))
(define (x->os-byte-vector x)
(os-string->byte-vector (x->os-string x)))
; Utilities
(define (byte-vector-copy-z b)
(let* ((size-old (byte-vector-length b))
(nul? (and (positive? size-old)
(zero? (byte-vector-ref b (- size-old 1)))))
(size (if nul? size-old (+ 1 size-old)))
(result (make-byte-vector size 0)))
(copy-bytes! b 0 result 0 size-old)
result))
; Initialization
(initialize-os-string-text-codec!)
|