1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef INCLUDED_ORCUS_STREAM_HPP
#define INCLUDED_ORCUS_STREAM_HPP
#include "env.hpp"
#include <memory>
#include <string>
namespace orcus {
/**
* Represents the content of a file.
*
* The file content is memory-mapped initially, but may later become in-memory
* if the non-utf-8 content gets converted to utf-8.
*/
class ORCUS_PSR_DLLPUBLIC file_content
{
struct impl;
std::unique_ptr<impl> mp_impl;
public:
file_content(const file_content&) = delete;
file_content& operator= (const file_content&) = delete;
file_content();
file_content(file_content&& other);
file_content(std::string_view filepath);
~file_content();
/**
* Obtain the memory address to the first character in the content buffer.
*
* @return pointer to the first character in the buffer.
*/
const char* data() const;
/**
* Return the size of the content i.e. the number of characters in the
* content buffer.
*
* @return size of the content.
*/
size_t size() const;
/**
* Query whether or not the content is empty.
*
* @return true if the content is empty, otherwise false.
*/
bool empty() const;
/**
* Swap content with another instance.
*
* @param other another instance to swap content with.
*/
void swap(file_content& other);
/**
* Load from a new file. This will invalidate the pointer returned from the
* data() method prior to the call.
*
* @param filepath path of the file to load from.
*/
void load(std::string_view filepath);
/**
* Convert a non-utf-8 stream to a utf-8 one if the source stream contains
* a byte order mark. If not, it does nothing. When the conversion
* happens, the converted content will be stored in-memory.
*/
void convert_to_utf8();
std::string_view str() const;
};
/**
* Represents the content of an in-memory buffer. Note that this class will
* NOT own the content of the source buffer but simply will reference it,
* except when the original buffer is a non-utf-8 stream and the caller
* chooses to convert it to utf-8 by calling its convert_to_utf8() method.
*/
class ORCUS_PSR_DLLPUBLIC memory_content
{
struct impl;
std::unique_ptr<impl> mp_impl;
public:
memory_content(const file_content&) = delete;
memory_content& operator= (const file_content&) = delete;
memory_content();
memory_content(std::string_view s);
memory_content(memory_content&& other);
~memory_content();
const char* data() const;
size_t size() const;
bool empty() const;
void swap(memory_content& other);
/**
* Convert a non-utf-8 stream to a utf-8 one if the source stream contains
* a byte order mark. If not, it does nothing. When the conversion
* happens, the converted content will be owned by the object.
*/
void convert_to_utf8();
std::string_view str() const;
};
struct ORCUS_PSR_DLLPUBLIC line_with_offset
{
/** content of the entire line. */
std::string line;
/** 0-based line number. */
std::size_t line_number;
/** 0-based offset within the line. */
std::size_t offset_on_line;
line_with_offset(std::string _line, std::size_t _line_number, std::size_t _offset_on_line);
line_with_offset(const line_with_offset& other);
line_with_offset(line_with_offset&& other);
~line_with_offset();
bool operator== (const line_with_offset& other) const;
bool operator!= (const line_with_offset& other) const;
};
/**
* Generate a sensible error output for parse error including the line where
* the error occurred and the offset of the error position on that line.
*
* @param strm entire character stream where the error occurred.
* @param offset offset of the error position within the stream.
*
* @return string formatted to be usable as an error message for stdout.
*/
ORCUS_PSR_DLLPUBLIC std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offset);
/**
* Given a string consisting of multiple lines i.e. multiple line breaks,
* find the line that contains the specified offset position.
*
* @param strm string stream containing multiple lines to search.
* @param offset offset position.
*
* @return structure containing information about the line containing the
* offset position.
*
* @exception std::invalid_argument if the offset value equals or exceeds the
* length of the string stream being searched.
*/
ORCUS_PSR_DLLPUBLIC line_with_offset locate_line_with_offset(std::string_view strm, std::ptrdiff_t offset);
/**
* Given two strings, locate the position of the first character that is
* different between the two strings. Note that if one of the strings is
* empty (or both of them are empty), it returns 0.
*
* @param left one of the strings to compare.
* @param right one of the strings to compare.
*
* @return position of the first character that is different between the two
* compared strings.
*/
ORCUS_PSR_DLLPUBLIC size_t locate_first_different_char(std::string_view left, std::string_view right);
/**
* Calculate the logical length of a UTF-8 encoded string.
*
* @param s string to calculate the logical length of.
* @return logical length of the UTF-8 encoded string.
*/
ORCUS_PSR_DLLPUBLIC std::size_t calc_logical_string_length(std::string_view s);
} // namespace orcus
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|