File: read.hpp

package info (click to toggle)
reflect-cpp 0.21.0%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 13,128 kB
  • sloc: cpp: 50,336; python: 139; makefile: 30; sh: 3
file content (91 lines) | stat: -rw-r--r-- 3,023 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#ifndef RFL_CSV_READ_HPP_
#define RFL_CSV_READ_HPP_

#include <arrow/csv/reader.h>
#include <arrow/io/api.h>

#include <istream>
#include <memory>
#include <string>
#include <string_view>

#include "../Processors.hpp"
#include "../Result.hpp"
#include "../concepts.hpp"
#include "../internal/wrap_in_rfl_array_t.hpp"
#include "../parsing/tabular/ArrowReader.hpp"
#include "Settings.hpp"

namespace rfl::csv {

/// Parses an object from CSV using reflection.
template <class T, class... Ps>
Result<internal::wrap_in_rfl_array_t<T>> read(
    const char* _str, const size_t _size,
    const Settings& _settings = Settings{}) {
  arrow::io::IOContext io_context = arrow::io::default_io_context();

  const auto buffer = std::make_shared<arrow::Buffer>(
      internal::ptr_cast<const uint8_t*>(_str), _size);

  std::shared_ptr<arrow::io::InputStream> input =
      std::make_shared<arrow::io::BufferReader>(buffer);

  auto read_options = arrow::csv::ReadOptions::Defaults();
  auto convert_options = arrow::csv::ConvertOptions::Defaults();
  convert_options.null_values =
      std::vector<std::string>({_settings.null_string});
  convert_options.strings_can_be_null = true;

  auto parse_options = arrow::csv::ParseOptions::Defaults();
  parse_options.delimiter = _settings.delimiter;
  parse_options.quoting = _settings.quoting;
  parse_options.quote_char = _settings.quote_char;
  parse_options.double_quote = _settings.double_quote;
  parse_options.escaping = _settings.escaping;
  parse_options.escape_char = _settings.escape_char;
  parse_options.newlines_in_values = _settings.newlines_in_values;
  parse_options.ignore_empty_lines = _settings.ignore_empty_lines;

  auto maybe_reader = arrow::csv::TableReader::Make(
      io_context, input, read_options, parse_options, convert_options);

  if (!maybe_reader.ok()) {
    return error("Could not construct CSV reader: " +
                 maybe_reader.status().message());
  }

  std::shared_ptr<arrow::csv::TableReader> reader = *maybe_reader;

  auto maybe_table = reader->Read();
  if (!maybe_table.ok()) {
    return error("Could not read table: " + maybe_table.status().message());
  }

  const std::shared_ptr<arrow::Table> table = *maybe_table;

  using ArrowReader =
      parsing::tabular::ArrowReader<T, parsing::tabular::SerializationType::csv,
                                    Ps...>;

  return ArrowReader::make(table).and_then(
      [](const auto& _r) { return _r.read(); });
}

/// Parses an object from CSV using reflection.
template <class T, class... Ps>
auto read(const std::string_view _str, const Settings& _settings = Settings{}) {
  return read<T, Ps...>(_str.data(), _str.size(), _settings);
}

/// Parses an object from a stream.
template <class T, class... Ps>
auto read(std::istream& _stream, const Settings& _settings = Settings{}) {
  std::istreambuf_iterator<char> begin(_stream), end;
  auto bytes = std::vector<char>(begin, end);
  return read<T, Ps...>(bytes.data(), bytes.size(), _settings);
}

}  // namespace rfl::csv

#endif