File: vbz_plugin.cpp

package info (click to toggle)
libvbz-hdf-plugin 1.0.2-3.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,384 kB
  • sloc: cpp: 28,289; python: 392; ansic: 40; sh: 21; makefile: 19; xml: 16
file content (261 lines) | stat: -rw-r--r-- 7,047 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#include "vbz_plugin/vbz_hdf_plugin_export.h"
#include "vbz_plugin.h"
#include "vbz.h"

#include <gsl/gsl-lite.hpp>
#include <hdf5/hdf5_plugin_types.h>

#include <array>
#include <chrono>
#include <iostream>
#include <memory>

#ifdef _WIN32
# ifndef NOMINMAX
#  define NOMINMAX
#endif
# include <Windows.h>
#endif

#define VBZ_DEBUG 0

namespace {
#if VBZ_DEBUG
int checksum(gsl::span<char const> input)
{
    std::uint8_t output = 0x0;
    for (auto x : input)
    {
        output ^= x;
    }
    return output;
}
#endif

#if defined(_WIN32) && !defined(HDF5_USE_STATIC_LIBRARIES)
HMODULE get_hdf_module()
{
    static HMODULE module = nullptr;
    if (!module)
    {
        auto module_name = "hdf5.dll";
        auto env_val = getenv("VBZ_DEBUG_HDF");
        if (env_val && strcmp(env_val, "1") == 0)
        {
            module_name = "hdf5_D.dll";
        }

        module = LoadLibraryA(module_name);
    }

    if (!module)
    {
        std::cerr << "Failed to load hdf library" << std::endl;
        std::abort();
    }
    return module;
}

#endif

// Windows ends up using different runtimes/heaps depending on which
// _python_ version hdf5 plugin was built against, the simple way to fix this
// is to use whatever hdf5 uses for allocating memory.
//
void* h5_malloc(std::size_t size)
{
#if defined(_WIN32) && !defined(HDF5_USE_STATIC_LIBRARIES)
    static auto module = get_hdf_module();

    static auto malloc_memory = (void*(*)(size_t, bool))GetProcAddress(module, "H5allocate_memory");
    return malloc_memory(size, false);
#else
    return malloc(size);
#endif
}

void h5_free(void* memory)
{
#if defined(_WIN32) && !defined(HDF5_USE_STATIC_LIBRARIES)
    static auto module = get_hdf_module();

    static auto free_hdf5 = (int(*)(void *))GetProcAddress(module, "H5free_memory");
    free_hdf5(memory);
#else
    free(memory);
#endif
}

struct h5free_delete
{
    void operator()(void* x) { h5_free(x); }
};


}

size_t vbz_filter(
    unsigned flags,
    size_t cd_nelmts,
    const unsigned int cd_values[],
    size_t nbytes,
    size_t* buf_size,
    void** buf)
{
    std::unique_ptr<void, h5free_delete> outbuf;
    vbz_size_t outbuf_size = 0;
    vbz_size_t outbuf_used_size = 0;

    if (cd_nelmts < 3)
    {
        return 0;
    }

    unsigned int vbz_version = cd_values[FILTER_VBZ_VERSION_OPTION];
    unsigned int integer_size = cd_values[FILTER_VBZ_INTEGER_SIZE_OPTION];
    bool use_zig_zag = cd_values[FILTER_VBZ_USE_DELTA_ZIG_ZAG_COMPRESSION] != 0;

    unsigned int compression_level = 1;
    if (cd_nelmts > FILTER_VBZ_ZSTD_COMPRESSION_LEVEL_OPTION)
    {
        compression_level = cd_values[FILTER_VBZ_ZSTD_COMPRESSION_LEVEL_OPTION];
    }
    
    CompressionOptions options{ use_zig_zag, integer_size, compression_level, vbz_version };
    
#if VBZ_DEBUG
    std::cout << "======================================================\n"
        << "Using options:"
        << " integer_size: " << integer_size
        << " use_zig_zag: " << use_zig_zag
        << " compression_level: " << compression_level
        << std::endl;
#endif

    // If decompressing
    if (flags & H5Z_FLAG_REVERSE)
    {
        auto input_span = gsl::make_span(static_cast<char*>(*buf), *buf_size);
        if (input_span.size() > std::numeric_limits<vbz_size_t>::max())
        {
            std::cerr << "vbz_filter: Chunk size too large." << std::endl;
            return 0;
        }

#if VBZ_DEBUG
        std::cout << "Decmpressing data with checksum " << checksum(input_span) << std::endl;
#endif

        auto const expected_uncompressed_size = vbz_decompressed_size(
            input_span.data(),
            vbz_size_t(input_span.size()),
            &options);
        if (vbz_is_error(expected_uncompressed_size))
        {
            std::cerr << "vbz_filter: size error" << std::endl;
            return 0;
        }
        outbuf.reset(h5_malloc(expected_uncompressed_size));

        outbuf_used_size = vbz_decompress_sized(
            input_span.data(),
            vbz_size_t(input_span.size()),
            outbuf.get(),
            expected_uncompressed_size,
            &options);
        if (vbz_is_error(outbuf_used_size))
        {
            std::cerr << "vbz_filter: compression error" << std::endl;
            return 0;
        }
        
        if (outbuf_used_size != expected_uncompressed_size)
        {
            std::cerr << "vbz_filter: decompressed size error" << std::endl;
            return 0;
        }

#if VBZ_DEBUG
        std::cout << "Decompressed dataset from " << *buf_size << "  bytes to " << outbuf_used_size
            << " with checksum " << checksum(gsl::make_span(static_cast<char*>(outbuf.get()), outbuf_used_size)) << std::endl;
#endif
    }
    else // compressing
    {
#if VBZ_DEBUG
        std::cout << "Compressing data with checksum " << checksum(gsl::make_span(static_cast<char*>(*buf), *buf_size)) << std::endl;
#endif
        if (*buf_size > std::numeric_limits<vbz_size_t>::max())
        {
            std::cerr << "vbz_filter: Chunk size too large." << std::endl;
            return 0;
        }

        auto const byte_remainder = *buf_size % integer_size;
        if (byte_remainder != 0)
        {
            std::cerr << "vbz_filter: Invalid integer_size specified" << std::endl;
            return 0;
        }

        outbuf_size = vbz_max_compressed_size(vbz_size_t(*buf_size), &options);
        outbuf.reset(h5_malloc(outbuf_size));
        
        auto output_span = gsl::make_span(static_cast<char*>(outbuf.get()), outbuf_size);

        // do compress
        outbuf_used_size += vbz_compress_sized(
            *buf,
            vbz_size_t(*buf_size),
            output_span.data(),
            vbz_size_t(output_span.size()),
            &options
        );
        if (vbz_is_error(outbuf_used_size))
        {
            std::cerr << "vbz_filter: compression error" << std::endl;;
            return 0;
        }

#if VBZ_DEBUG
        std::cout << "Compressed dataset from " << *buf_size << "  bytes to " << outbuf_used_size << " with checksum " << checksum(gsl::make_span(output_span.data(), outbuf_used_size)) << std::endl;
#endif
    }    

    h5_free(*buf);
    *buf = outbuf.release();
    *buf_size = outbuf_size;
    return outbuf_used_size;
}

H5Z_class2_t const vbz_filter_struct = {
    H5Z_CLASS_T_VERS,   // version
    FILTER_VBZ_ID,      // id
    1,                  // encoder_present
    1,                  // decoder_present
    "vbz",              // name
    nullptr,            // can_apply
    nullptr,            // set_local
    vbz_filter          // filter
};

extern "C" VBZ_HDF_PLUGIN_EXPORT const void* vbz_plugin_info(void)
{
    return &vbz_filter_struct;
}

// hdf plugin hooks
extern "C" VBZ_HDF_PLUGIN_EXPORT H5PL_type_t H5PLget_plugin_type(void)
{
    return H5PL_TYPE_FILTER;
}

// hdf plugin hooks
extern "C" VBZ_HDF_PLUGIN_EXPORT const void* H5PLget_plugin_info(void)
{
#if VBZ_DEBUG
    std::cout << "Registering vbz plugin" << std::endl;
#endif

    return vbz_plugin_info();
}