1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
|
#include <qpdf/InputSource_private.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QTC.hh>
#include <qpdf/Util.hh>
#include <cstring>
#include <stdexcept>
using namespace std::literals;
using namespace qpdf;
void
InputSource::setLastOffset(qpdf_offset_t offset)
{
this->last_offset = offset;
}
qpdf_offset_t
InputSource::getLastOffset() const
{
return this->last_offset;
}
size_t
InputSource::read_line(std::string& str, size_t count, qpdf_offset_t at)
{
// Return at most max_line_length characters from the next line. Lines are terminated by one or
// more \r or \n characters. Consume the trailing newline characters but don't return them.
// After this is called, the file will be positioned after a line terminator or at the end of
// the file, and last_offset will point to position the file had when this method was called.
read(str, count, at);
auto eol = str.find_first_of("\n\r"sv);
if (eol != std::string::npos) {
auto next_line = str.find_first_not_of("\n\r"sv, eol);
str.resize(eol);
if (eol != std::string::npos) {
seek(last_offset + static_cast<qpdf_offset_t>(next_line), SEEK_SET);
return eol;
}
}
// We did not necessarily find the end of the trailing newline sequence.
seek(last_offset, SEEK_SET);
findAndSkipNextEOL();
return eol;
}
std::string
InputSource::readLine(size_t max_line_length)
{
return read_line(max_line_length);
}
inline std::string
InputSource::read_line(size_t count, qpdf_offset_t at)
{
std::string result(count, '\0');
read_line(result, count, at);
return result;
}
bool
InputSource::findFirst(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
{
// Basic approach: search for the first character of start_chars starting from offset but not
// going past len (if len != 0). Once the first character is found, see if it is the beginning
// of a sequence of characters matching start_chars. If so, call finder.check() to do
// caller-specific additional checks. If not, keep searching.
// This code is tricky and highly subject to off-by-one or other edge case logic errors. See
// comments throughout that explain how we're not missing any edge cases. There are also tests
// specifically constructed to make sure we caught the edge cases in testing.
char buf[1025]; // size known to input_source.cc in libtests
// To enable us to guarantee null-termination, save an extra byte so that buf[size] is valid
// memory.
size_t size = sizeof(buf) - 1;
util::assertion(
!(strlen(start_chars) < 1 || strlen(start_chars) > size),
"InputSource::findSource called with too small or too large of a character sequence" //
);
char* p = nullptr;
qpdf_offset_t buf_offset = offset;
size_t bytes_read = 0;
// Guarantee that we return from this loop. Each time through, we either return, advance p, or
// restart the loop with a condition that will cause return on the next pass. Eventually we will
// either be out of range or hit EOF, either of which forces us to return.
while (true) {
// Do we need to read more data? Pretend size = 5, buf starts at 0, and start_chars has 3
// characters. buf[5] is valid and null. If p == 2, start_chars could be buf[2] through
// buf[4], so p + strlen(start_chars) == buf + size is okay. If p points to buf[size], since
// strlen(start_chars) is always >= 1, this overflow test will be correct for that case
// regardless of start_chars.
if ((p == nullptr) || ((p + strlen(start_chars)) > (buf + bytes_read))) {
if (p) {
QTC::TC(
"libtests", "InputSource read next block", ((p == buf + bytes_read) ? 0 : 1));
buf_offset += (p - buf);
}
this->seek(buf_offset, SEEK_SET);
// Read into buffer and zero out the rest of the buffer including buf[size]. We
// allocated an extra byte so that we could guarantee null termination as an extra
// protection against overrun when using string functions.
bytes_read = this->read(buf, size);
if (bytes_read < strlen(start_chars)) {
QTC::TC("libtests", "InputSource find EOF", bytes_read == 0 ? 0 : 1);
return false;
}
memset(buf + bytes_read, '\0', 1 + (size - bytes_read));
p = buf;
}
// Search for the first character.
if ((p = static_cast<char*>(
// line-break
memchr(p, start_chars[0], bytes_read - QIntC::to_size(p - buf)))) != nullptr) {
if (p == buf) {
QTC::TC("libtests", "InputSource found match at buf[0]");
}
// Found first letter.
if (len != 0) {
// Make sure it's in range.
size_t p_relative_offset = QIntC::to_size((p - buf) + (buf_offset - offset));
if (p_relative_offset >= len) {
// out of range
QTC::TC("libtests", "InputSource out of range");
return false;
}
}
if ((p + strlen(start_chars)) > (buf + bytes_read)) {
// If there are not enough bytes left in the file for start_chars, we will detect
// this on the next pass as EOF and return.
QTC::TC("libtests", "InputSource not enough bytes");
continue;
}
// See if p points to a sequence matching start_chars. We already checked above to make
// sure we are not going to overrun memory.
if (strncmp(p, start_chars, strlen(start_chars)) == 0) {
// Call finder.check() with the input source positioned to the point of the match.
this->seek(buf_offset + (p - buf), SEEK_SET);
if (finder.check()) {
return true;
} else {
QTC::TC("libtests", "InputSource start_chars matched but not check");
}
} else {
QTC::TC("libtests", "InputSource first char matched but not string");
}
// This occurrence of the first character wasn't a match. Skip over it and keep
// searching.
++p;
} else {
// Trigger reading the next block
p = buf + bytes_read;
}
}
}
bool
InputSource::findLast(char const* start_chars, qpdf_offset_t offset, size_t len, Finder& finder)
{
bool found = false;
qpdf_offset_t after_found_offset = 0;
qpdf_offset_t cur_offset = offset;
size_t cur_len = len;
while (this->findFirst(start_chars, cur_offset, cur_len, finder)) {
if (found) {
QTC::TC("libtests", "InputSource findLast found more than one");
} else {
found = true;
}
after_found_offset = this->tell();
cur_offset = after_found_offset;
cur_len = len - QIntC::to_size((cur_offset - offset));
}
if (found) {
this->seek(after_found_offset, SEEK_SET);
}
return found;
}
|