1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
package mahonia
// This file is based on bufio.Reader in the Go standard library,
// which has the following copyright notice:
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
import (
"io"
"unicode/utf8"
)
const (
defaultBufSize = 4096
)
// Reader implements character-set decoding for an io.Reader object.
type Reader struct {
buf []byte
rd io.Reader
decode Decoder
r, w int
err error
}
// NewReader creates a new Reader that uses the receiver to decode text.
func (d Decoder) NewReader(rd io.Reader) *Reader {
b := new(Reader)
b.buf = make([]byte, defaultBufSize)
b.rd = rd
b.decode = d
return b
}
// fill reads a new chunk into the buffer.
func (b *Reader) fill() {
// Slide existing data to beginning.
if b.r > 0 {
copy(b.buf, b.buf[b.r:b.w])
b.w -= b.r
b.r = 0
}
// Read new data.
n, e := b.rd.Read(b.buf[b.w:])
b.w += n
if e != nil {
b.err = e
}
}
// Read reads data into p.
// It returns the number of bytes read into p.
// It calls Read at most once on the underlying Reader,
// hence n may be less than len(p).
// At EOF, the count will be zero and err will be os.EOF.
func (b *Reader) Read(p []byte) (n int, err error) {
n = len(p)
filled := false
if n == 0 {
return 0, b.err
}
if b.w == b.r {
if b.err != nil {
return 0, b.err
}
if n > len(b.buf) {
// Large read, empty buffer.
// Allocate a larger buffer for efficiency.
b.buf = make([]byte, n)
}
b.fill()
filled = true
if b.w == b.r {
return 0, b.err
}
}
i := 0
for i < n {
rune, size, status := b.decode(b.buf[b.r:b.w])
if status == STATE_ONLY {
b.r += size
continue
}
if status == NO_ROOM {
if b.err != nil {
rune = 0xfffd
size = b.w - b.r
if size == 0 {
break
}
status = INVALID_CHAR
} else if filled {
break
} else {
b.fill()
filled = true
continue
}
}
if i+utf8.RuneLen(rune) > n {
break
}
b.r += size
if rune < 128 {
p[i] = byte(rune)
i++
} else {
i += utf8.EncodeRune(p[i:], rune)
}
}
return i, nil
}
// ReadRune reads a single Unicode character and returns the
// rune and its size in bytes.
func (b *Reader) ReadRune() (c rune, size int, err error) {
read:
c, size, status := b.decode(b.buf[b.r:b.w])
if status == NO_ROOM && b.err == nil {
b.fill()
goto read
}
if status == STATE_ONLY {
b.r += size
goto read
}
if b.r == b.w {
return 0, 0, b.err
}
if status == NO_ROOM {
c = 0xfffd
size = b.w - b.r
status = INVALID_CHAR
}
b.r += size
return c, size, nil
}
|