1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
#ifndef RITSUKO_ITERATE_ND_DATASET_HPP
#define RITSUKO_ITERATE_ND_DATASET_HPP
#include "H5Cpp.h"
#include <vector>
#include <algorithm>
#include <cmath>
/**
* @file IterateNdDataset.hpp
* @brief Iterate through an N-dimensional dataset by block.
*/
namespace ritsuko {
namespace hdf5 {
/**
* @brief Iterate through an N-dimensional dataset by block.
*
* This iterates through an N-dimensional dataset in a blockwise fashion, constructing `H5::DataSpace` objects to enable callers to easily read the dataset contents at each block.
* Block sizes are typically determined from dataset chunking via `pick_nd_block_dimensions()`, which ensures efficient access of entire chunks at each step.
*/
struct IterateNdDataset {
/**
* @param d Dataset dimensions.
* @param b Block dimensions, typically obtained from `pick_nd_block_dimensions()`.
* This should be of the same length as `d`, where each value of `b` is no greater than its counterpart in `d`.
*/
IterateNdDataset(std::vector<hsize_t> d, std::vector<hsize_t> b) :
data_extent(std::move(d)),
block_extent(std::move(b)),
ndims(data_extent.size()),
starts_internal(ndims),
counts_internal(block_extent),
dspace(ndims, data_extent.data())
{
for (auto b : block_extent) {
total_size *= b;
}
if (total_size) {
dspace.selectHyperslab(H5S_SELECT_SET, counts_internal.data(), starts_internal.data());
mspace.setExtentSimple(ndims, counts_internal.data());
} else {
finished_internal = true;
}
}
/**
* Move to the next step in the iteration.
* This will modify the state of all references returned by the getters.
*/
void next() {
// Attempting a shift from the last dimension as this is the fastest-changing.
for (size_t i = ndims; i > 0; --i) {
auto d = i - 1;
starts_internal[d] += block_extent[d];
// Shift was possible, breaking out.
if (starts_internal[d] < data_extent[d]) {
total_size /= counts_internal[d];
counts_internal[d] = std::min(data_extent[d] - starts_internal[d], block_extent[d]);
total_size *= counts_internal[d];
break;
}
// Next step isn't possible as we've reached the end of the dataset.
if (d == 0) {
finished_internal = true;
return;
}
// Reached the end of the current dimension extent; set it to zero,
// move to the next dimension and increment it.
starts_internal[d] = 0;
total_size /= counts_internal[d];
counts_internal[d] = std::min(data_extent[d], block_extent[d]);
total_size *= counts_internal[d];
}
dspace.selectHyperslab(H5S_SELECT_SET, counts_internal.data(), starts_internal.data());
mspace.setExtentSimple(ndims, counts_internal.data());
}
public:
/**
* @return Whether the iteration is finished.
* All other getters should only be accessed if this is `true`.
*/
bool finished() const {
return finished_internal;
}
/**
* @return Size of the current block, in terms of the number of elements.
* This is usually equal to the product of the block dimensions used in the constructor,
* except at the edges of the dataset where the current block may be truncated.
*/
size_t current_block_size() const {
return total_size;
}
/**
* @return Starting coordinates of the current block.
*/
const std::vector<hsize_t>& starts () const {
return starts_internal;
}
/**
* @return Dimensions of the current block.
* This is usually equal to the block dimensions used in the constructor,
* except at the edges of the dataset where the current block may be truncated.
*/
const std::vector<hsize_t>& counts () const {
return counts_internal;
}
/**
* @return Dataspace for extracting block contents from file.
*/
const H5::DataSpace& file_space() const {
return dspace;
}
/**
* @return Dataspace for storing the block contents in memory.
* This assumes a contiguous memory allocation that has space for at least `total_size()` elements.
*/
const H5::DataSpace& memory_space() const {
return mspace;
}
/**
* @return Dimensions of the dataset, as provided in the constructor.
*/
const std::vector<hsize_t>& dimensions() const {
return data_extent;
}
/**
* @return Dimensions of the blocks, as provided in the constructor.
*/
const std::vector<hsize_t>& block_dimensions() const {
return block_extent;
}
private:
std::vector<hsize_t> data_extent, block_extent;
size_t ndims;
std::vector<hsize_t> starts_internal, counts_internal;
H5::DataSpace mspace, dspace;
bool finished_internal = false;
size_t total_size = 1;
};
}
}
#endif
|