1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
|
// Package unicode implements unicode encoding and decoding with streaming support.
// It provides unicode encoding using strconv.QuoteToASCII for converting
// byte data to unicode escape sequences and back.
package unicode
import (
"io"
"strconv"
)
// StdEncoder represents a unicode encoder for standard encoding operations.
// It wraps strconv.QuoteToASCII to provide a consistent interface with
// error handling capabilities.
type StdEncoder struct {
Error error // Error field for storing encoding errors
}
// NewStdEncoder creates a new unicode encoder using strconv.QuoteToASCII.
func NewStdEncoder() *StdEncoder {
return &StdEncoder{}
}
// Encode encodes the given byte slice using unicode encoding.
// Returns an empty byte slice if the input is empty.
// The encoding process uses strconv.QuoteToASCII to convert bytes to unicode escape sequences.
func (e *StdEncoder) Encode(src []byte) (dst []byte) {
if e.Error != nil {
return
}
if len(src) == 0 {
return
}
// Use strconv.QuoteToASCII to convert bytes to unicode escape sequences
quoted := strconv.QuoteToASCII(string(src))
// Remove the surrounding quotes added by QuoteToASCII
dst = []byte(quoted[1 : len(quoted)-1])
return
}
// StdDecoder represents a unicode decoder for standard decoding operations.
// It wraps strconv.Unquote to provide a consistent interface with
// error handling capabilities.
type StdDecoder struct {
Error error // Error field for storing decoding errors
}
// NewStdDecoder creates a new unicode decoder using strconv.Unquote.
func NewStdDecoder() *StdDecoder {
return &StdDecoder{}
}
// Decode decodes the given unicode-encoded byte slice back to binary data.
// Returns the decoded data and any error encountered during decoding.
// Returns an empty byte slice and nil error if the input is empty.
func (d *StdDecoder) Decode(src []byte) (dst []byte, err error) {
if d.Error != nil {
err = d.Error
return
}
if len(src) == 0 {
return
}
// Add quotes around the unicode string for proper unquoting
quoted := "\"" + string(src) + "\""
unquoted, err := strconv.Unquote(quoted)
if err != nil {
d.Error = DecodeFailedError{Input: string(src)}
err = DecodeFailedError{Input: string(src)}
return
}
return []byte(unquoted), nil
}
// StreamEncoder represents a streaming unicode encoder that implements io.WriteCloser.
// It provides efficient encoding for large data streams by processing data
// in chunks and writing encoded output immediately.
type StreamEncoder struct {
writer io.Writer // Underlying writer for encoded output
buffer []byte // Buffer for accumulating partial bytes
encodeBuf [512]byte // Fixed-size reusable buffer for encoding output
Error error // Error field for storing encoding errors
}
// NewStreamEncoder creates a new streaming unicode encoder that writes encoded data
// to the provided io.Writer. The encoder uses strconv.QuoteToASCII.
func NewStreamEncoder(w io.Writer) io.WriteCloser {
return &StreamEncoder{
writer: w,
}
}
// Write implements the io.Writer interface for streaming unicode encoding.
// Processes data in chunks while maintaining minimal state for cross-Write calls.
// This is true streaming - processes data immediately without accumulating large buffers.
func (e *StreamEncoder) Write(p []byte) (n int, err error) {
if e.Error != nil {
return 0, e.Error
}
if len(p) == 0 {
return 0, nil
}
// For unicode encoding, we need to process the entire string at once
// because unicode escape sequences can span across byte boundaries
// So we accumulate all data and process it on close
e.buffer = append(e.buffer, p...)
return len(p), nil
}
// encodeChunk encodes a chunk of data using unicode encoding.
func (e *StreamEncoder) encodeChunk(data []byte) []byte {
// Use strconv.QuoteToASCII to convert bytes to unicode escape sequences
quoted := strconv.QuoteToASCII(string(data))
// Remove the surrounding quotes added by QuoteToASCII
return []byte(quoted[1 : len(quoted)-1])
}
// Close implements the io.Closer interface for streaming unicode encoding.
// Encodes any remaining buffered bytes from the last Write call.
// This is the only place where we handle cross-Write state.
func (e *StreamEncoder) Close() error {
if e.Error != nil {
return e.Error
}
// Encode all buffered data
if len(e.buffer) > 0 {
encoded := e.encodeChunk(e.buffer)
if _, err := e.writer.Write(encoded); err != nil {
return err
}
e.buffer = nil
}
return nil
}
// StreamDecoder represents a streaming unicode decoder that implements io.Reader.
// It provides efficient decoding for large data streams by processing data
// in chunks and maintaining an internal buffer for partial reads.
type StreamDecoder struct {
reader io.Reader // Underlying reader for encoded input
buffer []byte // Buffer for decoded data not yet read
pos int // Current position in the decoded buffer
readBuf [1024]byte // Fixed-size reusable buffer for reading encoded data
decodeBuf [512]byte // Fixed-size reusable buffer for decoded data
Error error // Error field for storing decoding errors
}
// NewStreamDecoder creates a new streaming unicode decoder that reads encoded data
// from the provided io.Reader. The decoder uses strconv.Unquote.
func NewStreamDecoder(r io.Reader) io.Reader {
return &StreamDecoder{
reader: r,
buffer: make([]byte, 0, 1024), // Pre-allocate buffer for decoded data
pos: 0,
}
}
// Read implements the io.Reader interface for streaming unicode decoding.
// Reads and decodes unicode data from the underlying reader in chunks.
// Maintains an internal buffer to handle partial reads efficiently.
func (d *StreamDecoder) Read(p []byte) (n int, err error) {
if d.Error != nil {
return 0, d.Error
}
// Return buffered data if available
if d.pos < len(d.buffer) {
n = copy(p, d.buffer[d.pos:])
d.pos += n
return n, nil
}
// Read encoded data in chunks using fixed-size buffer
rn, err := d.reader.Read(d.readBuf[:])
if err != nil && err != io.EOF {
return 0, err
}
if rn == 0 {
return 0, io.EOF
}
// Decode the data using the standard unicode decoder
decoded, err := d.decodeChunk(d.readBuf[:rn])
if err != nil {
return 0, err
}
// Copy decoded data to the provided buffer
copied := copy(p, decoded)
if copied < len(decoded) {
// Buffer remaining data for next read
d.buffer = decoded[copied:]
d.pos = 0
}
return copied, nil
}
// decodeChunk decodes a chunk of unicode-encoded data.
func (d *StreamDecoder) decodeChunk(data []byte) (dst []byte, err error) {
// Add quotes around the unicode string for proper unquoting
quoted := "\"" + string(data) + "\""
unquoted, err := strconv.Unquote(quoted)
if err != nil {
d.Error = DecodeFailedError{Input: string(data)}
err = DecodeFailedError{Input: string(data)}
return
}
return []byte(unquoted), nil
}
|