1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
|
#ifndef _BUFFERED_READER_HPP
#define _BUFFERED_READER_HPP
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdio>
#include <exception>
#include <iostream>
#include <list>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <utility>
#include <vector>
#include "MaeParserConfig.hpp"
namespace schrodinger
{
/**
* A simple data class to hold unchanging character buffer data. Copies are
* reference counted.
*/
class EXPORT_MAEPARSER BufferData
{
private:
std::vector<char> m_data;
size_t m_size;
public:
explicit BufferData(size_t size = 0);
/**
* Return access to the beginning of the data buffer for loading.
*/
char* begin() { return m_data.data(); }
/**
* Return a pointer to the beginning of the character buffer.
*/
const char* begin() const { return m_data.data(); }
/**
* Return the logical size of the buffer.
*/
size_t size() const { return m_size; }
/**
* Reduce (but not increase) logical size of the buffer (to keep track of
* size).
*
* Specifying a size larger than the current one throws a runtime_error.
*
* This doesn't actually free up any memory or modify the underlying
* character buffer.
*/
void resize(size_t size);
};
/**
* Base class for loading BufferData objects from some source.
*/
class BufferLoader
{
private:
size_t m_default_size;
public:
static const size_t DEFAULT_SIZE = 131072;
explicit BufferLoader(size_t default_size = DEFAULT_SIZE)
: m_default_size(default_size)
{
}
virtual ~BufferLoader() = default;
/**
* Return the default buffer size for this BufferLoader. This should be
* used to construct the BufferData object if no other size is preferred.
*/
virtual size_t getDefaultSize() const { return m_default_size; }
/**
* Load the next chunk of data into the BufferData object.
*
* The amount of data loaded will be a maximum of the current size of
* BufferData.
*
* Returns true if new characters were loaded.
*/
bool load(BufferData& data) const
{
char* begin = nullptr;
char* end = nullptr;
return load(data, begin, end);
}
/**
* Copy everything from 'begin' through 'end' to the beginning of the
* BufferData object (to deal with tokens that span buffer boundaries) and
* then load the next chunk of data into the remainder of the BufferData.
*
* Returns true if new characters were loaded.
*/
virtual bool load(BufferData& data, const char* begin,
const char* end) const;
protected:
/**
* Read 'size' bytes and dump them into 'ptr'. Return the number of bytes
* loaded.
*/
virtual size_t readData(char* ptr, size_t size) const = 0;
};
/**
* A BufferLoader that reads from an input stream.
*
* Note that the input stream is not owned by the StreamLoader (and so
* ifstreams must be closed by the caller).
*/
class StreamLoader : public BufferLoader
{
private:
std::istream& m_stream;
public:
StreamLoader(std::istream& stream) : m_stream(stream) {}
StreamLoader() = delete;
StreamLoader(const StreamLoader&) = delete;
StreamLoader& operator=(const StreamLoader&) = delete;
size_t readData(char* ptr, size_t size) const override;
};
/**
* A BufferLoader that reads from a FILE pointer.
*
* Experiments on OS X show this to be measurably faster than input stream
* reading. While it seems this isn't expected in theory, no way to make them
* equivalent has yet been found in practice.
*
* Note that the FILE pointer is not owned by the FileLoader and must be closed
* by the caller.
*/
class FileLoader : public BufferLoader
{
private:
FILE* m_file;
public:
FileLoader(FILE* file) : m_file(file) {}
size_t readData(char* ptr, size_t size) const override;
};
/**
* Character buffer.
*
* This is a shared resource manager for an allocated character buffer along
* with iterator location information.
*
* The character buffer is stored in a BufferData object, which is reference
* counted and can be retrieved by the data() method.
*/
class EXPORT_MAEPARSER Buffer
{
private:
BufferData m_data;
BufferLoader* m_loader{nullptr};
size_t m_starting_column{1};
public:
char* begin{nullptr};
char* end{nullptr};
char* current{nullptr};
size_t line_number{1};
explicit Buffer(size_t buffer_size = 0);
/**
* Construct an empty buffer that can be loaded from the provided input
* stream.
*/
explicit Buffer(std::istream& stream, size_t buffer_size = 0);
/**
* Construct an empty buffer that can be loaded from the provided FILE
* pointer.
*/
explicit Buffer(FILE* file, size_t buffer_size = 0);
/**
* Create a buffer from a string.
*
* This makes a copy of the string data. Calls to load() always return
* false.
*/
explicit Buffer(const std::string& str);
~Buffer();
Buffer(const Buffer&) = delete;
Buffer& operator=(const Buffer&) = delete;
public:
void setBufferLoader(BufferLoader* loader) { m_loader = loader; }
BufferLoader* getBufferLoader() { return m_loader; }
/**
* Load new BufferData from the BufferLoader. Update Buffer pointers.
*/
bool load()
{
char* save = nullptr;
return load(save);
}
/**
* Save data from the 'save' pointer to the end of the current BufferData
* into a new BufferData instance, then load the remainder of the
* BufferData instance with data from the BufferLoader.
*
* This allows us to deal with tokens that cross buffer boundaries.
*
* Update Buffer pointers.
*/
bool load(char*& save);
inline size_t size() const { return m_data.size(); }
BufferData data() const { return m_data; }
inline bool operator==(const Buffer& other) const
{
return this->m_data.begin() == other.m_data.begin();
}
inline bool operator!=(const Buffer& other) const
{
return !(*this == other);
}
friend std::ostream& operator<<(std::ostream& os, const Buffer& b);
/**
* Return the column number of the last character read.
*/
size_t getColumn() const { return getColumn(current); }
/**
* Return the column number of the provided character.
*/
size_t getColumn(const char* ptr) const;
};
/**
* Allow Buffer objects to be written to a stream.
*
* This purpose of this is only to allow Boost testing assertions to work. It
* doesn't do anything terribly useful.
*/
std::ostream& operator<<(std::ostream& os, const Buffer& b);
/**
* A class to collect tokens with minimal copying by saving the buffer and
* token start and end indices.
*/
class EXPORT_MAEPARSER TokenBufferList
{
public:
/// A simple data class to keep the info about the buffers and tokens
// straight.
class EXPORT_MAEPARSER TokenBuffer
{
public:
BufferData buffer_data;
/// The index of the first token stored in this buffer.
size_t first_value;
/// One greater than the index of the last token stored in this buffer.
size_t last_value;
TokenBuffer(BufferData data, size_t next_index)
: buffer_data(std::move(data)), first_value(next_index),
last_value(next_index)
{
}
};
private:
/// List of TokenBuffer objects.
std::list<TokenBuffer> m_token_buffer_list;
/// Buffer indices for the beginnings of collected tokens.
std::vector<size_t> m_begin;
/// Buffer indices for one past the end of collected tokens.
std::vector<size_t> m_end;
public:
TokenBufferList() : m_token_buffer_list(), m_begin(), m_end() {}
void reserve(size_t size)
{
m_begin.reserve(size);
m_end.reserve(size);
}
inline void setTokenIndices(size_t begin, size_t end)
{
m_begin.push_back(begin);
m_end.push_back(end);
m_token_buffer_list.back().last_value = m_end.size();
}
void appendBufferData(const BufferData& buffer_data);
/**
* Return token data as char pointer and length.
*
* Data is owned by the TokenBufferList and does not need to be freed.
*
* No trailing '\0' is present; data length must be observed.
*/
void getData(size_t index, const char** const data,
size_t* const length) const;
};
/**
* A class to modify a Buffer's loading behavior through RAII.
*
* It collects all loaded BufferData instances and stores them in a
* TokenBufferList for parsing later.
*/
class BufferDataCollector : public BufferLoader
{
private:
Buffer* m_buffer;
BufferLoader* m_loader;
TokenBufferList* m_tokens_list;
public:
BufferDataCollector(Buffer* buffer, TokenBufferList* tokens_list)
: m_loader(nullptr), m_tokens_list(tokens_list)
{
m_buffer = buffer;
m_loader = m_buffer->getBufferLoader();
m_buffer->setBufferLoader(this);
}
~BufferDataCollector() override { m_buffer->setBufferLoader(m_loader); }
bool load(BufferData& data, const char* begin,
const char* end) const override;
size_t readData(char* ptr, size_t size) const override;
};
} // end namespace schrodinger
#endif
|