File: Utf8Text.cpp

package info (click to toggle)
storm-lang 0.7.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 52,028 kB
  • sloc: ansic: 261,471; cpp: 140,432; sh: 14,891; perl: 9,846; python: 2,525; lisp: 2,504; asm: 860; makefile: 678; pascal: 70; java: 52; xml: 37; awk: 12
file content (112 lines) | stat: -rw-r--r-- 2,060 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include "stdafx.h"
#include "Utf8Text.h"
#include "Utf.h"

namespace storm {

	Utf8Input::Utf8Input(IStream *src) : src(src), buf(), pos(0) {}

	Utf8Input::Utf8Input(IStream *src, Buffer start) : src(src), buf(), pos(0) {
		buf = buffer(engine(), max(Nat(bufSize), start.filled()));
		buf.filled(start.filled());
		memcpy(buf.dataPtr(), start.dataPtr(), start.filled());
	}

	void Utf8Input::close() {
		src->close();
	}

	Char Utf8Input::readChar() {
		byte ch = readByte();
		nat left;
		nat r = utf8::firstData(ch, left);

		for (nat i = 0; i < left; i++) {
			ch = readByte();

			if (utf8::isCont(ch)) {
				r = utf8::addCont(r, ch);
			} else {
				// Invalid codepoint, unget ch and return '?'
				ungetByte();
				return Char(replacementChar);
			}
		}

		return Char(r);
	}

	Byte Utf8Input::readByte() {
		if (buf.count() == 0) {
			buf = src->read(bufSize);
			pos = 0;
		}
		if (pos >= buf.filled()) {
			if (buf.count() < bufSize) {
				buf = src->read(bufSize);
			} else {
				buf.filled(0);
				buf = src->read(buf);
			}
			pos = 0;
		}
		if (pos < buf.filled())
			return buf[pos++];
		else
			return 0;
	}

	void Utf8Input::ungetByte() {
		if (pos > 0)
			pos--;
	}

	/**
	 * Write.
	 */

	Utf8Output::Utf8Output(OStream *to) : TextOutput(), dest(to) {
		init();
	}

	Utf8Output::Utf8Output(OStream *to, TextInfo info) : TextOutput(info), dest(to) {
		init();
	}

	void Utf8Output::init() {
		buf = buffer(engine(), bufSize);
		buf.filled(0);
	}

	void Utf8Output::flush() {
		if (buf.filled() > 0)
			dest->write(buf);
		buf.filled(0);
	}

	void Utf8Output::close() {
		flush();
		dest->close();
	}

	void Utf8Output::writeChar(Char ch) {
		Nat cp = ch.codepoint();
		byte out[utf8::maxBytes];
		nat bytes = 0;

		byte *at = utf8::encode(cp, out, &bytes);
		writeBytes(at, bytes);
	}

	void Utf8Output::writeBytes(const byte *data, Nat count) {
		Nat filled = buf.filled();
		if (filled + count >= buf.count()) {
			flush();
			filled = buf.filled();
		}

		memcpy(buf.dataPtr() + filled, data, count);
		buf.filled(filled + count);
	}

}