File: compression-performance.cc

package info (click to toggle)
eckit 1.32.4-3
  • links: PTS
  • area: main
  • in suites: forky, sid
  • size: 305,876 kB
  • sloc: cpp: 111,654; ansic: 2,826; yacc: 590; lex: 361; python: 237; sh: 202; makefile: 42
file content (148 lines) | stat: -rw-r--r-- 4,679 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*
 * (C) Copyright 1996- ECMWF.
 *
 * This software is licensed under the terms of the Apache Licence Version 2.0
 * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
 * In applying this licence, ECMWF does not waive the privileges and immunities
 * granted to it by virtue of its status as an intergovernmental organisation nor
 * does it submit to any jurisdiction.
 */

#include <cassert>
#include <cstdio>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <locale>
#include <memory>

#include "eckit/filesystem/PathName.h"
#include "eckit/io/Buffer.h"
#include "eckit/io/DataHandle.h"
#include "eckit/log/Bytes.h"
#include "eckit/log/Seconds.h"
#include "eckit/log/Timer.h"

#include "eckit/utils/Compressor.h"

#include "eckit/testing/Test.h"

using namespace std;
using namespace eckit;
using namespace eckit::testing;

namespace eckit {
namespace test {

//----------------------------------------------------------------------------------------------------------------------

template <int N>
size_t timeCompress(const Compressor& compressor, eckit::Buffer& inBuffer, eckit::Buffer& outBuffer,
                    eckit::Timer& timer) {

    timer.start();

    size_t out;
    for (int i = 0; i < N; ++i) {
        out = compressor.compress(inBuffer, inBuffer.size(), outBuffer);
    }

    timer.stop();

    std::cout << "       - compress()   rate " << Bytes(N * inBuffer.size(), timer) << " -- factor " << std::fixed
              << std::setprecision(2) << ((100.0 * out) / inBuffer.size()) << "% -- ";
    std::cout << std::fixed << std::setprecision(1) << (out / 1024.0) << "/" << (inBuffer.size() / 1024.0) << " KB"
              << std::endl;
    return out;
}

template <int N>
void timeDecompress(const Compressor& compressor, eckit::Buffer& inBuffer, size_t inlen, eckit::Buffer& outBuffer,
                    size_t outlen, eckit::Timer& timer) {

    timer.start();

    for (int i = 0; i < N; ++i) {
        compressor.uncompress(inBuffer, inlen, outBuffer, outlen);
    }

    timer.stop();

    std::cout << "       - decompress() rate " << Bytes(N * outlen, timer) << std::endl;
}


struct BinaryData {
    eckit::Buffer in;
    std::string description;
    BinaryData(const eckit::PathName& path, const std::string& desc) : in(path.size()), description(desc) {
        std::unique_ptr<DataHandle> dh(path.fileHandle());
        dh->openForRead();
        dh->read(in, in.size());
        dh->close();
    }
};

//----------------------------------------------------------------------------------------------------------------------

CASE("Test compression performance") {

    eckit::Timer timer;

    std::vector<BinaryData> data;

    data.emplace_back("2t_sfc.grib", "GRIB t2 surface layer");
    data.emplace_back("2t_sfc_regrid.grib", "GRIB t2 surface layer re-gridded");
    data.emplace_back("vo-d_6ml.grib", "GRIB vo/d layers (10-15 spherical harmonics)");
    data.emplace_back("u-v_6ml.grib", "GRIB u/v layers (10-15)");
    data.emplace_back("q_6ml_regrid.grib", "GRIB q 6 layers (10-15) re-gridded");

    std::vector<std::string> compressors{"none", "lz4", "snappy", "aec", "bzip2"};

    constexpr int N = 5;  // Number of iterations to use for each case

    auto test_case = [&](const Compressor& compressor, BinaryData& data) {
        // Allocations here to prevent them from being timed in timings during first iteration
        // Allocation with more than required can prevent internal buffers and memcopies, depending on implementation

        auto ulen       = data.in.size();
        size_t oversize = size_t(1.2 * ulen);  // used in AEC
        Buffer compressed{oversize};
        Buffer uncompressed{oversize};

        // touch memory
        compressed.zero();
        uncompressed.zero();

        // Compress
        size_t clen = timeCompress<N>(compressor, data.in, compressed, timer);

        // Decompress
        timeDecompress<N>(compressor, compressed, clen, uncompressed, ulen, timer);
    };


    for (const auto& name : compressors) {

        if (eckit::CompressorFactory::instance().has(name)) {

            std::cout << name << std::endl;

            std::unique_ptr<eckit::Compressor> compressor(eckit::CompressorFactory::instance().build(name));

            for (auto& d : data) {
                std::cout << "    " << d.description << std::endl;
                test_case(*compressor, d);
            }
        }
    }
}

//----------------------------------------------------------------------------------------------------------------------

}  // namespace test
}  // namespace eckit

int main(int argc, char* argv[]) {
    return run_tests(argc, argv);
}