File: Text.h

package info (click to toggle)
storm-lang 0.7.5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 52,100 kB
  • sloc: ansic: 261,471; cpp: 140,438; sh: 14,891; perl: 9,846; python: 2,525; lisp: 2,504; asm: 860; makefile: 678; pascal: 70; java: 52; xml: 37; awk: 12
file content (177 lines) | stat: -rw-r--r-- 5,070 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#pragma once
#include "Core/Object.h"
#include "Core/Char.h"
#include "Url.h"
#include "Stream.h"

namespace storm {
	STORM_PKG(core.io);

	/**
	 * Text IO. Supports UTF8 and UTF16, little and big endian. Use 'readText' to detect which
	 * encoding is used and create the correct reader for that encoding.
	 *
	 * Line endings are always converted into a single \n (ie. Unix line endings). If the original
	 * line endings are desired, use 'readAllRaw' on a TextReader. Supports \r\n (Windows), \n
	 * (Unix) and \r (Macintosh).
	 *
	 * TODO: This should also be extensible.
	 * TODO: Better handling of line endings.
	 * TODO: Find a good way of duplicating an input format to an output format.
	 * TODO: Add TextInfo to a TextReader as well!
	 * TODO: It is quite natural to have 'readLine' return MAYBE(Str *), then we can do:
	 *   while (line = x.readLine()) {}
	 */

	/**
	 * Information about encoded text.
	 */
	class TextInfo {
		STORM_VALUE;
	public:
		// Create the default configuration (all members set to false).
		STORM_CTOR TextInfo();

		// Use windows-style line endings.
		Bool useCrLf;

		// Output a byte order mark first.
		Bool useBom;
	};

	// Create the default text information for the current system.
	TextInfo STORM_FN sysTextInfo();

	// Create a text information that produces Unix-style line endings.
	TextInfo STORM_FN unixTextInfo();

	// Create a text information that procuces Windows-style line endings.
	TextInfo STORM_FN windowsTextInfo();

	// Create a text information that produces Windows-style line endings, and specify whether a BOM
	// should be outputted.
	TextInfo STORM_FN windowsTextInfo(Bool bom);


	/**
	 * Base interface for reading text. Caches one character. When implementing your own version,
	 * override 'readChar' to read a code point in UTF32. Manages line endings so that the 'read'
	 * functions will always only see '\n' as line endings. 'readRaw' and 'peek' may observe a '\r'
	 * sometimes.
	 */
	class TextInput : public Object {
		STORM_ABSTRACT_CLASS;
	public:
		// Create.
		STORM_CTOR TextInput();

		// Read a single character from the stream. Returns `Char(0)` on failure.
		Char STORM_FN read();

		// Read a single character from the stream without line-ending conversion. Returns `Char(0)` on failure.
		Char STORM_FN readRaw();

		// Peek a single character. Returns `Char(0)` on failure.
		Char STORM_FN peek();

		// Read an entire line from the file. Removes any line endings.
		Str *STORM_FN readLine();

		// Read the entire file into a string.
		Str *STORM_FN readAll();

		// Read the entire file without any conversions of line endings (still ignores any BOM).
		Str *STORM_FN readAllRaw();

		// Does the file contain any more data? Note: if a timeout is reached in the underlying
		// stream, 'more' will return false until a future call of `peek` or `read`.
		Bool STORM_FN more();

		// Close the underlying stream.
		virtual void STORM_FN close();

	protected:
		// Override in derived readers, read one character.
		virtual Char STORM_FN readChar() ABSTRACT;

	private:
		// Cached code point. 0 if at end of stream.
		Char next;

		// Is 'next' valid?
		Bool hasNext;

		// First character?
		Bool first;

		// At end of file.
		Bool eof;

		// Helper for reading which removes any BOM.
		Char doRead();
	};


	// Create a text reader. Identifies the encoding automatically and creates an appropriate reader.
	TextInput *STORM_FN readText(IStream *stream);

	// Create a text reader from an `Url`. Equivalent to calling `readText(file.read())`.
	TextInput *STORM_FN readText(Url *file);

	// Create a text reader that reads data from a string. Utilizes `StrInput`.
	TextInput *STORM_FN readStr(Str *from);

	// Read the text from a file into a string. Equivalent to calling `readText(file).readAll()`.
	Str *STORM_FN readAllText(IStream *stream);

	// Read the text from a file into a string. Equivalent to calling `readText(file).readAll()`.
	Str *STORM_FN readAllText(Url *file);


	/**
	 * Base interface for writing text. Buffers entire lines. When implementing your own version,
	 * override 'writeChar' and 'flush' to write a code point in UTF32.
	 */
	class TextOutput : public Object {
		STORM_ABSTRACT_CLASS;
	public:
		// Create. Outputs plain Unix line endings.
		STORM_CTOR TextOutput();

		// Create. Specify line endings.
		STORM_CTOR TextOutput(TextInfo info);

		// Automatic flush on newline? (on by default)
		Bool autoFlush;

		// Write a character.
		void STORM_FN write(Char c);

		// Write a string.
		void STORM_FN write(Str *s);

		// Write a string, add any line endings.
		void STORM_FN writeLine(Str *s);

		// Write a new-line character.
		void STORM_FN writeLine();

		// Flush all buffered output to the underlying stream.
		virtual void STORM_FN flush();

		// Close the underlying stream.
		virtual void STORM_FN close();

	protected:
		// Override in derived writers. Write one character.
		virtual void STORM_FN writeChar(Char ch) ABSTRACT;

	private:
		// Text config.
		TextInfo config;

		// Write a bom if needed.
		void writeBom();
	};

}