File: read.hpp

package info (click to toggle)
r-bioc-alabaster.base 1.6.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,652 kB
  • sloc: cpp: 11,377; sh: 29; makefile: 2
file content (209 lines) | stat: -rw-r--r-- 6,599 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#ifndef COMSERVATORY_READ_HPP
#define COMSERVATORY_READ_HPP

#include <vector>
#include <string>
#include "Creator.hpp"
#include "Parser.hpp"
#include "byteme/byteme.hpp"

/**
 * @file read.hpp
 *
 * @brief Read a CSV file.
 */

namespace comservatory {

/**
 * @brief Options for reading the contents of a CSV file.
 */
struct ReadOptions {
    /**
     * Whether to parallelize reading and parsing in multi-threaded environments.
     */
    bool parallel = false;

    /**
     * Whether to only validate the CSV structure, not store any of the data in memory.
     * If `true`, all fields in the output `Contents` are represented by dummy placeholders,
     * and all settings of `creator`, `keep_subset`, `keep_subset_names` and `keep_subset_indices` are ignored.
     */
    bool validate_only = false;

    /**
     * Pointer to an instance of a concrete `FieldCreator` subclass.
     * If `NULL`, it defaults to an instance of an internal subclass that creates `FilledField` objects (or `DummyField`, if `dummy = true` in the `FieldCreator::create()` calls).
     */
    const FieldCreator* creator = nullptr; 

    /**
     * Whether to keep only a subset of fields.
     * If `true`, data is only loaded for the fields specified in `keep_subset_names` or `keep_subset_indices`,
     * and all other fields are represented by dummy placeholders in the output `Contents`.
     * If `false`, `keep_subset_names` and `keep_subset_indices` are ignored.
     */
    bool keep_subset = false;

    /** 
     * Container of strings with the names of the fields to retain.
     *
     * Field names do not have to actually be present in the CSV file - if absent, they are simply ignored.
     */
    std::vector<std::string> keep_subset_names;

    /**
     * Container of integers with the indices of the fields to retain.
     *
     * Indices are expected to non-negative and less than the total number of fields.
     * Values outside of this range will be ignored.
     */
    std::vector<int> keep_subset_indices;
};

/**
 * @cond
 */
namespace internals {

inline Parser configure_parser(const FieldCreator* creator, const ReadOptions& options) {
    Parser parser(creator);
    if (options.keep_subset) {
        parser.set_check_store(true);
        parser.set_store_by_name(options.keep_subset_names);
        parser.set_store_by_index(options.keep_subset_indices);
    }
    return parser;
}

}
/**
 * @endcond
 */

/**
 * @tparam Reader A reader class that implements the same methods as `bytme::Reader`.
 *
 * @param reader Instance of a `Reader` class, containing the data stream for a CSV file.
 * @param contents `Contents` to store the parsed contents of the file.
 * @param options Reading options.
 *
 * `contents` can contain pre-filled `Contents::names`, which will be checked against the header names in the file.
 * Any differences in the observed header names and those in `Contents::names` will throw an error.
 *
 * `contents` can also contain pre-filled `Contents::fields`, which will be directly used for storing data from each column.
 * This is useful if the types of all columns are known in advance, and/or if certain columns need special handling via `Field` subclasses.
 * Any pre-filled field with an `UNKNOWN` type will be replaced via `Creator::create()`.
 */
template<class Reader>
void read(Reader& reader, Contents& contents, const ReadOptions& options) {
    if (options.validate_only) {
        DefaultFieldCreator<true> creator;
        auto parser = internals::configure_parser(&creator, options);
        parser.parse(reader, contents, options.parallel);
    } else if (options.creator) {
        auto parser = internals::configure_parser(options.creator, options);
        parser.parse(reader, contents, options.parallel);
    } else {
        DefaultFieldCreator<false> creator;
        auto parser = internals::configure_parser(&creator, options);
        parser.parse(reader, contents, options.parallel);
    }
}

/**
 * @tparam Reader A reader class that implements the same methods as `bytme::Reader`.
 *
 * @param reader Instance of a `Reader` class, containing the data stream for a CSV file.
 * @param options Reading options.
 *
 * @return The `Contents` of the CSV file.
 */
template<class Reader>
Contents read(Reader& reader, const ReadOptions& options) {
    Contents output;
    read(reader, output, options);
    return output;
}

/**
 * @param path Path to a (possibly Gzipped) CSV file.
 * @param contents `Contents` to store the parsed contents of the file, see the `read()` overload for details.
 * @param options Reading options.
 *
 * Gzip support requires linking to the Zlib library.
 */
inline void read_file(const char* path, Contents& contents, const ReadOptions& options) {
#if __has_include("zlib.h")
    byteme::SomeFileReader reader(path);
#else
    byteme::RawFileReader reader(path);
#endif
    read(reader, contents, options);
} 

/**
 * @param path Path to a (possibly Gzipped) CSV file.
 * @param options Reading options.
 *
 * @return The `Contents` of the CSV file.
 *
 * Gzip support requires linking to the Zlib library.
 */
inline Contents read_file(const char* path, const ReadOptions& options) {
    Contents output;
    read_file(path, output, options);
    return output;
} 

/**
 * @param path Path to a (possibly Gzipped) CSV file.
 *
 * @return The `Contents` of the CSV file.
 *
 * Gzip support requires linking to the Zlib library.
 */
inline Contents read_file(const char* path) {
    return read_file(path, ReadOptions());
} 

/**
 * @param path Path to a (possibly Gzipped) CSV file.
 * @param contents `Contents` to store the parsed contents of the file, see the `read()` overload for details.
 * @param options Reading options.
 *
 * @return The `Contents` of the CSV file.
 *
 * Gzip support requires linking to the Zlib library.
 */
inline void read_file(const std::string& path, Contents& contents, const ReadOptions& options) {
    read_file(path.c_str(), contents, options);
} 

/**
 * @param path Path to a (possibly Gzipped) CSV file.
 * @param options Reading options.
 *
 * @return The `Contents` of the CSV file.
 *
 * Gzip support requires linking to the Zlib library.
 */
inline Contents read_file(const std::string& path, const ReadOptions& options) {
    return read_file(path.c_str(), options);
} 

/**
 * @param path Path to a (possibly Gzipped) CSV file.
 *
 * @return The `Contents` of the CSV file.
 *
 * Gzip support requires linking to the Zlib library.
 */
inline Contents read_file(const std::string& path) {
    return read_file(path.c_str(), ReadOptions());
} 


}

#endif