File: utils_string.hpp

package info (click to toggle)
r-bioc-alabaster.base 1.6.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,652 kB
  • sloc: cpp: 11,377; sh: 29; makefile: 2
file content (89 lines) | stat: -rw-r--r-- 3,009 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#ifndef TAKANE_UTILS_STRING_HPP
#define TAKANE_UTILS_STRING_HPP

#include <unordered_set>
#include <string>
#include <cstdint>
#include <vector>
#include <stdexcept>

#include "ritsuko/ritsuko.hpp"
#include "ritsuko/hdf5/hdf5.hpp"

namespace takane {

namespace internal_string {

template<class H5Object_>
std::string fetch_format_attribute(const H5Object_& handle) {
    if (!handle.attrExists("format")) {
        return "none";
    }

    auto attr = handle.openAttribute("format");
    if (!ritsuko::hdf5::is_scalar(attr)) {
        throw std::runtime_error("expected 'format' attribute to be a scalar");
    }
    if (!ritsuko::hdf5::is_utf8_string(attr)) {
        throw std::runtime_error("expected 'format' to have a datatype that can be represented by a UTF-8 encoded string");
    }

    return ritsuko::hdf5::load_scalar_string_attribute(attr);
}

inline void validate_string_format(const H5::DataSet& handle, hsize_t len, const std::string& format, bool has_missing, const std::string& missing_value, hsize_t buffer_size) {
    if (format == "date") {
        ritsuko::hdf5::Stream1dStringDataset stream(&handle, len, buffer_size);
        for (hsize_t i = 0; i < len; ++i, stream.next()) {
            auto x = stream.steal();
            if (has_missing && missing_value == x) {
                continue;
            }
            if (!ritsuko::is_date(x.c_str(), x.size())) {
                throw std::runtime_error("expected a date-formatted string (got '" + x + "')");
            }
        }

    } else if (format == "date-time") {
        ritsuko::hdf5::Stream1dStringDataset stream(&handle, len, buffer_size);
        for (hsize_t i = 0; i < len; ++i, stream.next()) {
            auto x = stream.steal();
            if (has_missing && missing_value == x) {
                continue;
            }
            if (!ritsuko::is_rfc3339(x.c_str(), x.size())) {
                throw std::runtime_error("expected a date/time-formatted string (got '" + x + "')");
            }
        }

    } else if (format == "none") {
        ritsuko::hdf5::validate_1d_string_dataset(handle, len, buffer_size);

    } else {
        throw std::runtime_error("unsupported format '" + format + "'");
    }
}

inline void validate_names(const H5::Group& handle, const std::string& name, size_t len, hsize_t buffer_size) {
    if (!handle.exists(name)) {
        return;
    }

    auto nhandle = ritsuko::hdf5::open_dataset(handle, name.c_str());
    if (!ritsuko::hdf5::is_utf8_string(nhandle)) {
        throw std::runtime_error("expected '" + name + "' to have a datatype that can be represented by a UTF-8 encoded string");
    }

    auto nlen = ritsuko::hdf5::get_1d_length(nhandle.getSpace(), false);
    if (len != nlen) {
        throw std::runtime_error("'" + name + "' should have the same length as the parent object (got " + std::to_string(nlen) + ", expected " + std::to_string(len) + ")");
    }

    ritsuko::hdf5::validate_1d_string_dataset(nhandle, len, buffer_size);
}

}

}

#endif