1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
|
// Formatting library for C++ - scanning API proof of concept
//
// Copyright (c) 2019 - present, Victor Zverovich
// All rights reserved.
//
// For the license information refer to format.h.
#include <array>
#include <cassert>
#include <climits>
#include <tuple>
#include "fmt/format-inl.h"
FMT_BEGIN_NAMESPACE
namespace detail {
inline auto is_whitespace(char c) -> bool { return c == ' ' || c == '\n'; }
// If c is a hex digit returns its numeric value, otherwise -1.
inline auto to_hex_digit(char c) -> int {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
return -1;
}
struct maybe_contiguous_range {
const char* begin;
const char* end;
explicit operator bool() const { return begin != nullptr; }
};
class scan_buffer {
private:
const char* ptr_;
const char* end_;
bool contiguous_;
protected:
scan_buffer(const char* ptr, const char* end, bool contiguous)
: ptr_(ptr), end_(end), contiguous_(contiguous) {}
~scan_buffer() = default;
void set(span<const char> buf) {
ptr_ = buf.data;
end_ = buf.data + buf.size;
}
auto ptr() const -> const char* { return ptr_; }
public:
scan_buffer(const scan_buffer&) = delete;
void operator=(const scan_buffer&) = delete;
// Fills the buffer with more input if available.
virtual void consume() = 0;
class sentinel {};
class iterator {
private:
const char** ptr_;
scan_buffer* buf_; // This could be merged with ptr_.
char value_;
static auto get_sentinel() -> const char** {
static const char* ptr = nullptr;
return &ptr;
}
friend class scan_buffer;
friend auto operator==(iterator lhs, sentinel) -> bool {
return *lhs.ptr_ == nullptr;
}
friend auto operator!=(iterator lhs, sentinel) -> bool {
return *lhs.ptr_ != nullptr;
}
iterator(scan_buffer* buf) : buf_(buf) {
if (buf->ptr_ == buf->end_) {
ptr_ = get_sentinel();
return;
}
ptr_ = &buf->ptr_;
value_ = *buf->ptr_;
}
friend scan_buffer& get_buffer(iterator it) { return *it.buf_; }
public:
iterator() : ptr_(get_sentinel()), buf_(nullptr) {}
auto operator++() -> iterator& {
if (!buf_->try_consume()) ptr_ = get_sentinel();
value_ = *buf_->ptr_;
return *this;
}
auto operator++(int) -> iterator {
iterator copy = *this;
++*this;
return copy;
}
auto operator*() const -> char { return value_; }
auto base() const -> const char* { return buf_->ptr_; }
friend auto to_contiguous(iterator it) -> maybe_contiguous_range;
friend auto advance(iterator it, size_t n) -> iterator;
};
friend auto to_contiguous(iterator it) -> maybe_contiguous_range {
if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_};
return {nullptr, nullptr};
}
friend auto advance(iterator it, size_t n) -> iterator {
FMT_ASSERT(it.buf_->is_contiguous(), "");
const char*& ptr = it.buf_->ptr_;
ptr += n;
it.value_ = *ptr;
if (ptr == it.buf_->end_) it.ptr_ = iterator::get_sentinel();
return it;
}
auto begin() -> iterator { return this; }
auto end() -> sentinel { return {}; }
auto is_contiguous() const -> bool { return contiguous_; }
// Tries consuming a single code unit. Returns true iff there is more input.
auto try_consume() -> bool {
FMT_ASSERT(ptr_ != end_, "");
++ptr_;
if (ptr_ != end_) return true;
consume();
return ptr_ != end_;
}
};
using scan_iterator = scan_buffer::iterator;
using scan_sentinel = scan_buffer::sentinel;
class string_scan_buffer final : public scan_buffer {
private:
void consume() override {}
public:
explicit string_scan_buffer(string_view s)
: scan_buffer(s.begin(), s.end(), true) {}
};
class file_scan_buffer final : public scan_buffer {
private:
template <typename F, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0 &&
!FMT_USE_FALLBACK_FILE)>
static auto get_file(F* f, int) -> glibc_file<F> {
return f;
}
template <typename F,
FMT_ENABLE_IF(sizeof(F::_p) != 0 && !FMT_USE_FALLBACK_FILE)>
static auto get_file(F* f, int) -> apple_file<F> {
return f;
}
static auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_;
// Fills the buffer if it is empty.
void fill() {
span<const char> buf = file_.get_read_buffer();
if (buf.size == 0) {
int c = file_.get();
// Put the character back since we are only filling the buffer.
if (c != EOF) file_.unget(static_cast<char>(c));
buf = file_.get_read_buffer();
}
set(buf);
}
void consume() override {
// Consume the current buffer content.
size_t n = to_unsigned(ptr() - file_.get_read_buffer().data);
for (size_t i = 0; i != n; ++i) file_.get();
fill();
}
public:
explicit file_scan_buffer(FILE* f)
: scan_buffer(nullptr, nullptr, false), file_(f) {
flockfile(f);
fill();
}
~file_scan_buffer() {
FILE* f = file_;
funlockfile(f);
}
};
} // namespace detail
template <typename T, typename Char = char> struct scanner {
// A deleted default constructor indicates a disabled scanner.
scanner() = delete;
};
class scan_parse_context {
private:
string_view format_;
public:
using iterator = string_view::iterator;
FMT_CONSTEXPR explicit scan_parse_context(string_view format)
: format_(format) {}
FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); }
FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); }
void advance_to(iterator it) {
format_.remove_prefix(detail::to_unsigned(it - begin()));
}
};
namespace detail {
enum class scan_type {
none_type,
int_type,
uint_type,
long_long_type,
ulong_long_type,
string_type,
string_view_type,
custom_type
};
template <typename Context> struct custom_scan_arg {
void* value;
void (*scan)(void* arg, scan_parse_context& parse_ctx, Context& ctx);
};
} // namespace detail
// A scan argument. Context is a template parameter for the compiled API where
// output can be unbuffered.
template <typename Context> class basic_scan_arg {
private:
using scan_type = detail::scan_type;
scan_type type_;
union {
int* int_value_;
unsigned* uint_value_;
long long* long_long_value_;
unsigned long long* ulong_long_value_;
std::string* string_;
string_view* string_view_;
detail::custom_scan_arg<Context> custom_;
// TODO: more types
};
template <typename T>
static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx,
Context& ctx) {
auto s = scanner<T>();
parse_ctx.advance_to(s.parse(parse_ctx));
ctx.advance_to(s.scan(*static_cast<T*>(arg), ctx));
}
public:
FMT_CONSTEXPR basic_scan_arg()
: type_(scan_type::none_type), int_value_(nullptr) {}
FMT_CONSTEXPR basic_scan_arg(int& value)
: type_(scan_type::int_type), int_value_(&value) {}
FMT_CONSTEXPR basic_scan_arg(unsigned& value)
: type_(scan_type::uint_type), uint_value_(&value) {}
FMT_CONSTEXPR basic_scan_arg(long long& value)
: type_(scan_type::long_long_type), long_long_value_(&value) {}
FMT_CONSTEXPR basic_scan_arg(unsigned long long& value)
: type_(scan_type::ulong_long_type), ulong_long_value_(&value) {}
FMT_CONSTEXPR basic_scan_arg(std::string& value)
: type_(scan_type::string_type), string_(&value) {}
FMT_CONSTEXPR basic_scan_arg(string_view& value)
: type_(scan_type::string_view_type), string_view_(&value) {}
template <typename T>
FMT_CONSTEXPR basic_scan_arg(T& value) : type_(scan_type::custom_type) {
custom_.value = &value;
custom_.scan = scan_custom_arg<T>;
}
constexpr explicit operator bool() const noexcept {
return type_ != scan_type::none_type;
}
auto type() const -> detail::scan_type { return type_; }
template <typename Visitor>
auto visit(Visitor&& vis) -> decltype(vis(monostate())) {
switch (type_) {
case scan_type::none_type:
break;
case scan_type::int_type:
return vis(*int_value_);
case scan_type::uint_type:
return vis(*uint_value_);
case scan_type::long_long_type:
return vis(*long_long_value_);
case scan_type::ulong_long_type:
return vis(*ulong_long_value_);
case scan_type::string_type:
return vis(*string_);
case scan_type::string_view_type:
return vis(*string_view_);
case scan_type::custom_type:
break;
}
return vis(monostate());
}
auto scan_custom(const char* parse_begin, scan_parse_context& parse_ctx,
Context& ctx) const -> bool {
if (type_ != scan_type::custom_type) return false;
parse_ctx.advance_to(parse_begin);
custom_.scan(custom_.value, parse_ctx, ctx);
return true;
}
};
class scan_context;
using scan_arg = basic_scan_arg<scan_context>;
struct scan_args {
int size;
const scan_arg* data;
template <size_t N>
FMT_CONSTEXPR scan_args(const std::array<scan_arg, N>& store)
: size(N), data(store.data()) {
static_assert(N < INT_MAX, "too many arguments");
}
};
class scan_context {
private:
detail::scan_buffer& buf_;
scan_args args_;
public:
using iterator = detail::scan_iterator;
using sentinel = detail::scan_sentinel;
FMT_CONSTEXPR explicit scan_context(detail::scan_buffer& buf, scan_args args)
: buf_(buf), args_(args) {}
FMT_CONSTEXPR auto arg(int id) const -> scan_arg {
return id < args_.size ? args_.data[id] : scan_arg();
}
auto begin() const -> iterator { return buf_.begin(); }
auto end() const -> sentinel { return {}; }
void advance_to(iterator) { buf_.consume(); }
};
namespace detail {
const char* parse_scan_specs(const char* begin, const char* end,
format_specs& specs, scan_type) {
while (begin != end) {
switch (to_ascii(*begin)) {
// TODO: parse more scan format specifiers
case 'x':
specs.set_type(presentation_type::hex);
++begin;
break;
case '}':
return begin;
}
}
return begin;
}
template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
auto read(scan_iterator it, T& value) -> scan_iterator {
if (it == scan_sentinel()) return it;
char c = *it;
if (c < '0' || c > '9') report_error("invalid input");
int num_digits = 0;
T n = 0, prev = 0;
char prev_digit = c;
do {
prev = n;
n = n * 10 + static_cast<unsigned>(c - '0');
prev_digit = c;
c = *++it;
++num_digits;
if (c < '0' || c > '9') break;
} while (it != scan_sentinel());
// Check overflow.
if (num_digits <= std::numeric_limits<int>::digits10) {
value = n;
return it;
}
unsigned max = to_unsigned((std::numeric_limits<int>::max)());
if (num_digits == std::numeric_limits<int>::digits10 + 1 &&
prev * 10ull + unsigned(prev_digit - '0') <= max) {
value = n;
} else {
report_error("number is too big");
}
return it;
}
template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
auto read_hex(scan_iterator it, T& value) -> scan_iterator {
if (it == scan_sentinel()) return it;
int digit = to_hex_digit(*it);
if (digit < 0) report_error("invalid input");
int num_digits = 0;
T n = 0;
do {
n = (n << 4) + static_cast<unsigned>(digit);
++num_digits;
digit = to_hex_digit(*++it);
if (digit < 0) break;
} while (it != scan_sentinel());
// Check overflow.
if (num_digits <= (std::numeric_limits<T>::digits >> 2))
value = n;
else
report_error("number is too big");
return it;
}
template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
auto read(scan_iterator it, T& value, const format_specs& specs)
-> scan_iterator {
if (specs.type() == presentation_type::hex) return read_hex(it, value);
return read(it, value);
}
template <typename T, FMT_ENABLE_IF(std::is_signed<T>::value)>
auto read(scan_iterator it, T& value, const format_specs& specs = {})
-> scan_iterator {
bool negative = it != scan_sentinel() && *it == '-';
if (negative) {
++it;
if (it == scan_sentinel()) report_error("invalid input");
}
using unsigned_type = typename std::make_unsigned<T>::type;
unsigned_type abs_value = 0;
it = read(it, abs_value, specs);
auto n = static_cast<T>(abs_value);
value = negative ? -n : n;
return it;
}
auto read(scan_iterator it, std::string& value, const format_specs& = {})
-> scan_iterator {
while (it != scan_sentinel() && *it != ' ') value.push_back(*it++);
return it;
}
auto read(scan_iterator it, string_view& value, const format_specs& = {})
-> scan_iterator {
auto range = to_contiguous(it);
// This could also be checked at compile time in scan.
if (!range) report_error("string_view requires contiguous input");
auto p = range.begin;
while (p != range.end && *p != ' ') ++p;
size_t size = to_unsigned(p - range.begin);
value = {range.begin, size};
return advance(it, size);
}
auto read(scan_iterator it, monostate, const format_specs& = {})
-> scan_iterator {
return it;
}
// An argument scanner that uses the default format, e.g. decimal for integers.
struct default_arg_scanner {
scan_iterator it;
template <typename T> FMT_INLINE auto operator()(T&& value) -> scan_iterator {
return read(it, value);
}
};
// An argument scanner with format specifiers.
struct arg_scanner {
scan_iterator it;
const format_specs& specs;
template <typename T> auto operator()(T&& value) -> scan_iterator {
return read(it, value, specs);
}
};
struct scan_handler {
private:
scan_parse_context parse_ctx_;
scan_context scan_ctx_;
int next_arg_id_;
using sentinel = scan_buffer::sentinel;
public:
FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf,
scan_args args)
: parse_ctx_(format), scan_ctx_(buf, args), next_arg_id_(0) {}
auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); }
void on_text(const char* begin, const char* end) {
if (begin == end) return;
auto it = scan_ctx_.begin();
for (; begin != end; ++begin, ++it) {
if (it == sentinel() || *begin != *it) on_error("invalid input");
}
scan_ctx_.advance_to(it);
}
FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); }
FMT_CONSTEXPR auto on_arg_id(int id) -> int {
if (!scan_ctx_.arg(id)) on_error("argument index out of range");
return id;
}
FMT_CONSTEXPR auto on_arg_id(string_view id) -> int {
if (id.data()) on_error("invalid format");
return 0;
}
void on_replacement_field(int arg_id, const char* begin) {
scan_arg arg = scan_ctx_.arg(arg_id);
if (arg.scan_custom(begin, parse_ctx_, scan_ctx_)) return;
auto it = scan_ctx_.begin();
while (it != sentinel() && is_whitespace(*it)) ++it;
scan_ctx_.advance_to(arg.visit(default_arg_scanner{it}));
}
auto on_format_specs(int arg_id, const char* begin, const char* end) -> const
char* {
scan_arg arg = scan_ctx_.arg(arg_id);
if (arg.scan_custom(begin, parse_ctx_, scan_ctx_))
return parse_ctx_.begin();
auto specs = format_specs();
begin = parse_scan_specs(begin, end, specs, arg.type());
if (begin == end || *begin != '}') on_error("missing '}' in format string");
scan_ctx_.advance_to(arg.visit(arg_scanner{scan_ctx_.begin(), specs}));
return begin;
}
FMT_NORETURN void on_error(const char* message) { report_error(message); }
};
void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) {
auto h = detail::scan_handler(fmt, buf, args);
detail::parse_format_string(fmt, h);
}
template <size_t I, typename... T, FMT_ENABLE_IF(I == sizeof...(T))>
void make_args(std::array<scan_arg, sizeof...(T)>&, std::tuple<T...>&) {}
template <size_t I, typename... T, FMT_ENABLE_IF(I < sizeof...(T))>
void make_args(std::array<scan_arg, sizeof...(T)>& args,
std::tuple<T...>& values) {
using element_type = typename std::tuple_element<I, std::tuple<T...>>::type;
static_assert(std::is_same<remove_cvref_t<element_type>, element_type>::value,
"");
args[I] = std::get<I>(values);
make_args<I + 1>(args, values);
}
} // namespace detail
template <typename Range, typename... T> class scan_data {
private:
std::tuple<T...> values_;
Range range_;
public:
scan_data() = default;
scan_data(T... values) : values_(std::move(values)...) {}
auto value() const -> decltype(std::get<0>(values_)) {
return std::get<0>(values_);
}
auto values() const -> const std::tuple<T...>& { return values_; }
auto make_args() -> std::array<scan_arg, sizeof...(T)> {
auto args = std::array<scan_arg, sizeof...(T)>();
detail::make_args<0>(args, values_);
return args;
}
auto range() const -> Range { return range_; }
auto begin() const -> decltype(range_.begin()) { return range_.begin(); }
auto end() const -> decltype(range_.end()) { return range_.end(); }
};
template <typename... T>
auto make_scan_args(T&... args) -> std::array<scan_arg, sizeof...(T)> {
return {{args...}};
}
class scan_error {};
// A rudimentary version of std::expected for testing the API shape.
template <typename T, typename E> class expected {
private:
T value_;
bool has_value_ = true;
public:
expected(T value) : value_(std::move(value)) {}
explicit operator bool() const { return has_value_; }
auto operator->() const -> const T* { return &value_; }
auto error() -> E const { return E(); }
};
template <typename Range, typename... T>
using scan_result = expected<scan_data<Range, T...>, scan_error>;
auto vscan(string_view input, string_view fmt, scan_args args)
-> string_view::iterator {
auto&& buf = detail::string_scan_buffer(input);
detail::vscan(buf, fmt, args);
return input.begin() + (buf.begin().base() - input.data());
}
// Scans the input and stores the results (in)to args.
template <typename... T>
auto scan_to(string_view input, string_view fmt, T&... args)
-> string_view::iterator {
return vscan(input, fmt, make_scan_args(args...));
}
template <typename... T>
auto scan(string_view input, string_view fmt)
-> scan_result<string_view, T...> {
auto data = scan_data<string_view, T...>();
vscan(input, fmt, data.make_args());
return data;
}
template <typename Range, typename... T,
FMT_ENABLE_IF(!std::is_convertible<Range, string_view>::value)>
auto scan_to(Range&& input, string_view fmt, T&... args)
-> decltype(std::begin(input)) {
auto it = std::begin(input);
detail::vscan(get_buffer(it), fmt, make_scan_args(args...));
return it;
}
template <typename... T>
auto scan_to(FILE* f, string_view fmt, T&... args) -> bool {
auto&& buf = detail::file_scan_buffer(f);
detail::vscan(buf, fmt, make_scan_args(args...));
return buf.begin() != buf.end();
}
FMT_END_NAMESPACE
|