1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
#ifndef TAKANE_DENSE_ARRAY_HPP
#define TAKANE_DENSE_ARRAY_HPP
#include "ritsuko/hdf5/hdf5.hpp"
#include "ritsuko/ritsuko.hpp"
#include "utils_public.hpp"
#include "utils_array.hpp"
#include <vector>
#include <string>
#include <stdexcept>
#include <filesystem>
#include <cstdint>
/**
* @file dense_array.hpp
* @brief Validation for dense arrays.
*/
namespace takane {
/**
* @namespace takane::dense_array
* @brief Definitions for dense arrays.
*/
namespace dense_array {
/**
* @cond
*/
namespace internal {
inline bool is_transposed(const H5::Group& ghandle) {
if (!ghandle.attrExists("transposed")) {
return false;
}
auto attr = ghandle.openAttribute("transposed");
if (!ritsuko::hdf5::is_scalar(attr)) {
throw std::runtime_error("expected 'transposed' attribute to be a scalar");
}
if (ritsuko::hdf5::exceeds_integer_limit(attr, 32, true)) {
throw std::runtime_error("expected 'transposed' attribute to have a datatype that fits in a 32-bit signed integer");
}
return ritsuko::hdf5::load_scalar_numeric_attribute<int32_t>(attr) != 0;
}
}
/**
* @endcond
*/
/**
* @param path Path to the directory containing a dense array.
* @param metadata Metadata for the object, typically read from its `OBJECT` file.
* @param options Validation options.
*/
inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
auto vstring = internal_json::extract_version_for_type(metadata.other, "dense_array");
auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
if (version.major != 1) {
throw std::runtime_error("unsupported version '" + vstring + "'");
}
auto handle = ritsuko::hdf5::open_file(path / "array.h5");
auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
internal::is_transposed(ghandle); // just a check, not used here.
auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
auto dspace = dhandle.getSpace();
size_t ndims = dspace.getSimpleExtentNdims();
if (ndims == 0) {
throw std::runtime_error("expected 'data' array to have at least one dimension");
}
std::vector<hsize_t> extents(ndims);
dspace.getSimpleExtentDims(extents.data());
auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "type");
if (type == "integer") {
if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 32, true)) {
throw std::runtime_error("expected integer array to have a datatype that fits into a 32-bit signed integer");
}
} else if (type == "boolean") {
if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 32, true)) {
throw std::runtime_error("expected boolean array to have a datatype that fits into a 32-bit signed integer");
}
} else if (type == "number") {
if (ritsuko::hdf5::exceeds_float_limit(dhandle, 64)) {
throw std::runtime_error("expected number array to have a datatype that fits into a 64-bit float");
}
} else if (type == "string") {
if (!ritsuko::hdf5::is_utf8_string(dhandle)) {
throw std::runtime_error("expected string array to have a datatype that can be represented by a UTF-8 encoded string");
}
ritsuko::hdf5::validate_nd_string_dataset(dhandle, extents, options.hdf5_buffer_size);
} else {
throw std::runtime_error("unknown array type '" + type + "'");
}
if (dhandle.attrExists("missing-value-placeholder")) {
auto attr = dhandle.openAttribute("missing-value-placeholder");
ritsuko::hdf5::check_missing_placeholder_attribute(dhandle, attr);
}
if (ghandle.exists("names")) {
internal_array::check_dimnames(ghandle, "names", extents, options);
}
}
/**
* @param path Path to the directory containing a dense array.
* @param metadata Metadata for the object, typically read from its `OBJECT` file.
* @param options Validation options.
* @return Extent of the first dimension.
*/
inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
auto handle = ritsuko::hdf5::open_file(path / "array.h5");
auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
auto dspace = dhandle.getSpace();
size_t ndims = dspace.getSimpleExtentNdims();
std::vector<hsize_t> extents(ndims);
dspace.getSimpleExtentDims(extents.data());
if (internal::is_transposed(ghandle)) {
return extents.back();
} else {
return extents.front();
}
}
/**
* @param path Path to the directory containing a dense array.
* @param metadata Metadata for the object, typically read from its `OBJECT` file.
* @param options Validation options.
* @return Dimensions of the array.
*/
inline std::vector<size_t> dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
auto handle = ritsuko::hdf5::open_file(path / "array.h5");
auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
auto dspace = dhandle.getSpace();
size_t ndims = dspace.getSimpleExtentNdims();
std::vector<hsize_t> extents(ndims);
dspace.getSimpleExtentDims(extents.data());
if (internal::is_transposed(ghandle)) {
return std::vector<size_t>(extents.rbegin(), extents.rend());
} else {
return std::vector<size_t>(extents.begin(), extents.end());
}
}
}
}
#endif
|