File: unicode.go

package info (click to toggle)
golang-github-dromara-dongle 1.2.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,440 kB
  • sloc: makefile: 4
file content (217 lines) | stat: -rw-r--r-- 6,896 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
// Package unicode implements unicode encoding and decoding with streaming support.
// It provides unicode encoding using strconv.QuoteToASCII for converting
// byte data to unicode escape sequences and back.
package unicode

import (
	"io"
	"strconv"
)

// StdEncoder represents a unicode encoder for standard encoding operations.
// It wraps strconv.QuoteToASCII to provide a consistent interface with
// error handling capabilities.
type StdEncoder struct {
	Error error // Error field for storing encoding errors
}

// NewStdEncoder creates a new unicode encoder using strconv.QuoteToASCII.
func NewStdEncoder() *StdEncoder {
	return &StdEncoder{}
}

// Encode encodes the given byte slice using unicode encoding.
// Returns an empty byte slice if the input is empty.
// The encoding process uses strconv.QuoteToASCII to convert bytes to unicode escape sequences.
func (e *StdEncoder) Encode(src []byte) (dst []byte) {
	if e.Error != nil {
		return
	}
	if len(src) == 0 {
		return
	}

	// Use strconv.QuoteToASCII to convert bytes to unicode escape sequences
	quoted := strconv.QuoteToASCII(string(src))
	// Remove the surrounding quotes added by QuoteToASCII
	dst = []byte(quoted[1 : len(quoted)-1])
	return
}

// StdDecoder represents a unicode decoder for standard decoding operations.
// It wraps strconv.Unquote to provide a consistent interface with
// error handling capabilities.
type StdDecoder struct {
	Error error // Error field for storing decoding errors
}

// NewStdDecoder creates a new unicode decoder using strconv.Unquote.
func NewStdDecoder() *StdDecoder {
	return &StdDecoder{}
}

// Decode decodes the given unicode-encoded byte slice back to binary data.
// Returns the decoded data and any error encountered during decoding.
// Returns an empty byte slice and nil error if the input is empty.
func (d *StdDecoder) Decode(src []byte) (dst []byte, err error) {
	if d.Error != nil {
		err = d.Error
		return
	}
	if len(src) == 0 {
		return
	}

	// Add quotes around the unicode string for proper unquoting
	quoted := "\"" + string(src) + "\""
	unquoted, err := strconv.Unquote(quoted)
	if err != nil {
		d.Error = DecodeFailedError{Input: string(src)}
		err = DecodeFailedError{Input: string(src)}
		return
	}
	return []byte(unquoted), nil
}

// StreamEncoder represents a streaming unicode encoder that implements io.WriteCloser.
// It provides efficient encoding for large data streams by processing data
// in chunks and writing encoded output immediately.
type StreamEncoder struct {
	writer    io.Writer // Underlying writer for encoded output
	buffer    []byte    // Buffer for accumulating partial bytes
	encodeBuf [512]byte // Fixed-size reusable buffer for encoding output
	Error     error     // Error field for storing encoding errors
}

// NewStreamEncoder creates a new streaming unicode encoder that writes encoded data
// to the provided io.Writer. The encoder uses strconv.QuoteToASCII.
func NewStreamEncoder(w io.Writer) io.WriteCloser {
	return &StreamEncoder{
		writer: w,
	}
}

// Write implements the io.Writer interface for streaming unicode encoding.
// Processes data in chunks while maintaining minimal state for cross-Write calls.
// This is true streaming - processes data immediately without accumulating large buffers.
func (e *StreamEncoder) Write(p []byte) (n int, err error) {
	if e.Error != nil {
		return 0, e.Error
	}

	if len(p) == 0 {
		return 0, nil
	}

	// For unicode encoding, we need to process the entire string at once
	// because unicode escape sequences can span across byte boundaries
	// So we accumulate all data and process it on close
	e.buffer = append(e.buffer, p...)

	return len(p), nil
}

// encodeChunk encodes a chunk of data using unicode encoding.
func (e *StreamEncoder) encodeChunk(data []byte) []byte {
	// Use strconv.QuoteToASCII to convert bytes to unicode escape sequences
	quoted := strconv.QuoteToASCII(string(data))
	// Remove the surrounding quotes added by QuoteToASCII
	return []byte(quoted[1 : len(quoted)-1])
}

// Close implements the io.Closer interface for streaming unicode encoding.
// Encodes any remaining buffered bytes from the last Write call.
// This is the only place where we handle cross-Write state.
func (e *StreamEncoder) Close() error {
	if e.Error != nil {
		return e.Error
	}

	// Encode all buffered data
	if len(e.buffer) > 0 {
		encoded := e.encodeChunk(e.buffer)
		if _, err := e.writer.Write(encoded); err != nil {
			return err
		}
		e.buffer = nil
	}

	return nil
}

// StreamDecoder represents a streaming unicode decoder that implements io.Reader.
// It provides efficient decoding for large data streams by processing data
// in chunks and maintaining an internal buffer for partial reads.
type StreamDecoder struct {
	reader    io.Reader  // Underlying reader for encoded input
	buffer    []byte     // Buffer for decoded data not yet read
	pos       int        // Current position in the decoded buffer
	readBuf   [1024]byte // Fixed-size reusable buffer for reading encoded data
	decodeBuf [512]byte  // Fixed-size reusable buffer for decoded data
	Error     error      // Error field for storing decoding errors
}

// NewStreamDecoder creates a new streaming unicode decoder that reads encoded data
// from the provided io.Reader. The decoder uses strconv.Unquote.
func NewStreamDecoder(r io.Reader) io.Reader {
	return &StreamDecoder{
		reader: r,
		buffer: make([]byte, 0, 1024), // Pre-allocate buffer for decoded data
		pos:    0,
	}
}

// Read implements the io.Reader interface for streaming unicode decoding.
// Reads and decodes unicode data from the underlying reader in chunks.
// Maintains an internal buffer to handle partial reads efficiently.
func (d *StreamDecoder) Read(p []byte) (n int, err error) {
	if d.Error != nil {
		return 0, d.Error
	}

	// Return buffered data if available
	if d.pos < len(d.buffer) {
		n = copy(p, d.buffer[d.pos:])
		d.pos += n
		return n, nil
	}

	// Read encoded data in chunks using fixed-size buffer
	rn, err := d.reader.Read(d.readBuf[:])
	if err != nil && err != io.EOF {
		return 0, err
	}

	if rn == 0 {
		return 0, io.EOF
	}

	// Decode the data using the standard unicode decoder
	decoded, err := d.decodeChunk(d.readBuf[:rn])
	if err != nil {
		return 0, err
	}

	// Copy decoded data to the provided buffer
	copied := copy(p, decoded)
	if copied < len(decoded) {
		// Buffer remaining data for next read
		d.buffer = decoded[copied:]
		d.pos = 0
	}

	return copied, nil
}

// decodeChunk decodes a chunk of unicode-encoded data.
func (d *StreamDecoder) decodeChunk(data []byte) (dst []byte, err error) {
	// Add quotes around the unicode string for proper unquoting
	quoted := "\"" + string(data) + "\""
	unquoted, err := strconv.Unquote(quoted)
	if err != nil {
		d.Error = DecodeFailedError{Input: string(data)}
		err = DecodeFailedError{Input: string(data)}
		return
	}
	return []byte(unquoted), nil
}