File: simple_list.hpp

package info (click to toggle)
r-bioc-alabaster.base 1.6.1%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 1,652 kB
  • sloc: cpp: 11,377; sh: 29; makefile: 2
file content (178 lines) | stat: -rw-r--r-- 6,278 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#ifndef TAKANE_SIMPLE_LIST_HPP
#define TAKANE_SIMPLE_LIST_HPP

#include <string>
#include <stdexcept>
#include <filesystem>

#include "uzuki2/uzuki2.hpp"
#include "byteme/byteme.hpp"

#include "utils_public.hpp"
#include "utils_other.hpp"

/**
 * @file simple_list.hpp
 * @brief Validation for simple lists.
 */

namespace takane {

/**
 * @cond
 */
void validate(const std::filesystem::path&, Options&);
/**
 * @endcond
 */

/**
 * @namespace takane::simple_list
 * @brief Definitions for simple lists.
 */
namespace simple_list {

/**
 * @cond
 */
namespace internal {

inline std::string extract_format(const internal_json::JsonObjectMap& map) {
    auto fIt = map.find("format");
    if (fIt == map.end()) {
        return "hdf5";
    }
    const auto& val = fIt->second;
    if (val->type() != millijson::STRING) {
        throw std::runtime_error("'simple_list.format' in the object metadata should be a JSON string");
    }
    return reinterpret_cast<millijson::String*>(val.get())->value;
}

inline std::pair<bool, size_t> extract_length(const internal_json::JsonObjectMap& map) {
    auto lIt = map.find("length");
    if (lIt == map.end()) {
        return std::pair<bool, size_t>(false, 0);
    }
    const auto& val = lIt->second;
    if (val->type() != millijson::NUMBER) {
        throw std::runtime_error("'simple_list.length' in the object metadata should be a JSON number");
    }
    return std::pair<bool, size_t>(true, reinterpret_cast<millijson::Number*>(val.get())->value);
}

}
/**
 * @endcond
 */

/**
 * @param path Path to the directory containing the simple list.
 * @param metadata Metadata for the object, typically read from its `OBJECT` file.
 * @param options Validation options.
 */
inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
    const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, "simple_list");

    const std::string& vstring = internal_json::extract_string_from_typed_object(metamap, "version", "simple_list");
    auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
    if (version.major != 1) {
        throw std::runtime_error("unsupported version string '" + vstring + "'");
    }

    std::string format = internal::extract_format(metamap);

    auto other_dir = path / "other_contents";
    int num_external = 0;
    if (std::filesystem::exists(other_dir)) {
        auto status = std::filesystem::status(other_dir);
        if (status.type() != std::filesystem::file_type::directory) {
            throw std::runtime_error("expected 'other_contents' to be a directory");
        } 

        num_external = internal_other::count_directory_entries(other_dir);
        for (int e = 0; e < num_external; ++e) {
            auto epath = other_dir / std::to_string(e);
            if (!std::filesystem::exists(epath)) {
                throw std::runtime_error("expected an external list object at '" + std::filesystem::relative(epath, path).string() + "'");
            }

            try {
                ::takane::validate(epath, options);
            } catch (std::exception& e) {
                throw std::runtime_error("failed to validate external list object at '" + std::filesystem::relative(epath, path).string() + "'; " + std::string(e.what()));
            }
        }
    }

    size_t len;
    if (format == "json.gz") {
        uzuki2::json::Options opt;
        opt.parallel = options.parallel_reads;
        auto gzreader = internal_other::open_reader<byteme::GzipFileReader>(path / "list_contents.json.gz");
        auto loaded = uzuki2::json::parse<uzuki2::DummyProvisioner>(gzreader, uzuki2::DummyExternals(num_external), std::move(opt));
        len = reinterpret_cast<const uzuki2::List*>(loaded.get())->size();

    } else if (format == "hdf5") {
        auto handle = ritsuko::hdf5::open_file(path / "list_contents.h5");
        auto ghandle = ritsuko::hdf5::open_group(handle, "simple_list");
        auto loaded = uzuki2::hdf5::parse<uzuki2::DummyProvisioner>(ghandle, uzuki2::DummyExternals(num_external));
        len = reinterpret_cast<const uzuki2::List*>(loaded.get())->size();

    } else {
        throw std::runtime_error("unknown format '" + format + "'");
    }

    if (version.ge(1, 1, 0)) {
        auto len_info = internal::extract_length(metamap);
        if (len_info.first) {
            if (len_info.second != len) {
                throw std::runtime_error("'simple_list.length' differs from the length of the list");
            }
        }
    }
}

/**
 * @param path Path to the directory containing the simple list.
 * @param metadata Metadata for the object, typically read from its `OBJECT` file.
 * @param options Validation options.
 * @return The number of list elements.
 */
inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
    const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, "simple_list");

    auto len_info = internal::extract_length(metamap);
    if (len_info.first) {
        return len_info.second;
    }

    std::string format = internal::extract_format(metamap);
    if (format == "hdf5") {
        auto handle = ritsuko::hdf5::open_file(path / "list_contents.h5");
        auto lhandle = handle.openGroup("simple_list");
        auto vhandle = lhandle.openGroup("data");
        return vhandle.getNumObjs();

    } else {
        // Not much choice but to parse the entire list here. We do so using the
        // dummy, which still has enough self-awareness to hold its own length.
        auto other_dir = path / "other_contents";
        int num_external = 0;
        if (std::filesystem::exists(other_dir)) {
            num_external = internal_other::count_directory_entries(other_dir);
        }

        uzuki2::json::Options opt;
        opt.parallel = options.parallel_reads;
        auto gzreader = internal_other::open_reader<byteme::GzipFileReader>(path / "list_contents.json.gz");
        auto ptr = uzuki2::json::parse<uzuki2::DummyProvisioner>(gzreader, uzuki2::DummyExternals(num_external), std::move(opt));
        return reinterpret_cast<const uzuki2::List*>(ptr.get())->size();
    }
}

}

}

#endif