File: load_dataset.hpp

package info (click to toggle)
r-bioc-alabaster.base 1.6.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,652 kB
  • sloc: cpp: 11,377; sh: 29; makefile: 2
file content (126 lines) | stat: -rw-r--r-- 4,366 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#ifndef RITSUKO_HDF5_LOAD_DATASET_HPP
#define RITSUKO_HDF5_LOAD_DATASET_HPP

#include <string>
#include <vector>
#include <stdexcept>

#include "H5Cpp.h"

#include "get_name.hpp"
#include "Stream1dStringDataset.hpp"
#include "Stream1dNumericDataset.hpp"
#include "as_numeric_datatype.hpp"
#include "_strings.hpp"

/**
 * @file load_dataset.hpp
 * @brief Helper functions to load datasets.
 */

namespace ritsuko {

namespace hdf5 {

/**
 * Load a scalar string dataset into a single string.
 * @param handle Handle to the HDF5 scalar dataset.
 * @return String containing the contents of the sole dataset entry.
 */
inline std::string load_scalar_string_dataset(const H5::DataSet& handle) {
    auto dtype = handle.getDataType();
    if (dtype.isVariableStr()) {
        char* vptr;
        handle.read(&vptr, dtype);
        auto dspace = handle.getSpace(); // don't set as temporary in constructor below, otherwise it gets destroyed and the ID invalidated.
        [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), dspace.getId(), &vptr);
        if (vptr == NULL) {
            throw std::runtime_error("detected a NULL pointer for a variable length string in '" + get_name(handle) + "'");
        }
        std::string output(vptr);
        return output;
    } else {
        size_t fixed_length = dtype.getSize();
        std::vector<char> buffer(fixed_length);
        handle.read(buffer.data(), dtype);
        return std::string(buffer.begin(), buffer.begin() + find_string_length(buffer.data(), fixed_length));
    }
}

/**
 * Load a 1-dimensional string dataset into a vector of strings.
 * @param handle Handle to the 1-dimensional HDF5 dataset.
 * @param full_length Length of the dataset as a 1-dimensional vector.
 * @param buffer_size Size of the buffer for holding loaded strings.
 * @return Vector of strings.
 */
inline std::vector<std::string> load_1d_string_dataset(const H5::DataSet& handle, hsize_t full_length, hsize_t buffer_size) {
    Stream1dStringDataset stream(&handle, full_length, buffer_size);
    std::vector<std::string> output;
    output.reserve(full_length);
    for (hsize_t i = 0; i < full_length; ++i, stream.next()) {
        output.emplace_back(stream.steal());
    }
    return output;
}

/**
 * Overload of `load_1d_string_dataset()` that determines the length via `get_1d_length()`.
 * @param handle Handle to the 1-dimensional HDF5 dataset.
 * @param buffer_size Size of the buffer for holding loaded strings.
 * @return Vector of strings.
 */
inline std::vector<std::string> load_1d_string_dataset(const H5::DataSet& handle, hsize_t buffer_size) {
    return load_1d_string_dataset(handle, get_1d_length(handle, false), buffer_size);
}


/**
 * Load a scalar numeric dataset into a single number.
 * @tparam Type_ Type of the number in memory.
 * @param handle Handle to the HDF5 scalar dataset.
 * @return Number containing the value of the sole dataset entry.
 */
template<typename Type_>
Type_ load_scalar_numeric_dataset(const H5::DataSet& handle) {
    Type_ output;
    handle.read(&output, as_numeric_datatype<Type_>());
    return output;
}

/**
 * Load a 1-dimensional numeric dataset into a vector.
 * @tparam Type_ Type of the number in memory.
 * @param handle Handle to the HDF5 dataset.
 * @param full_length Length of the dataset as a 1-dimensional vector.
 * @param buffer_size Size of the buffer for holding loaded strings.
 * @return Vector of numbers.
 */
template<typename Type_>
std::vector<Type_> load_1d_numeric_dataset(const H5::DataSet& handle, hsize_t full_length, hsize_t buffer_size) {
    Stream1dNumericDataset<Type_> stream(&handle, full_length, buffer_size);
    std::vector<Type_> output;
    output.reserve(full_length);
    for (hsize_t i = 0; i < full_length; ++i, stream.next()) {
        output.push_back(stream.get());
    }
    return output;
}

/**
 * Overload of `load_1d_numeric_dataset()` that determines the length via `get_1d_length()`.
 * @tparam Type_ Type of the number in memory.
 * @param handle Handle to the HDF5 dataset.
 * @param buffer_size Size of the buffer for holding loaded strings.
 * @return Vector of numbers.
 */
template<typename Type_>
std::vector<Type_> load_1d_numeric_dataset(const H5::DataSet& handle, hsize_t buffer_size) {
    return load_1d_numeric_dataset<Type_>(handle, get_1d_length(handle, false), buffer_size);
}

}

}

#endif