File: main.cpp

package info (click to toggle)
clickhouse 18.16.1%2Bds-7.3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 40,292 kB
  • sloc: cpp: 223,075; sql: 21,608; python: 6,596; sh: 4,299; ansic: 3,889; xml: 3,312; perl: 155; makefile: 57; asm: 34
file content (130 lines) | stat: -rw-r--r-- 3,916 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#include <iostream>

#include <boost/program_options.hpp>

#include <IO/CompressedWriteBuffer.h>
#include <IO/CompressedReadBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/CompressedReadBufferFromFile.h>


/** This program checks correctness of .mrk (marks) file for corresponding compressed .bin file.
  */


namespace DB
{
    namespace ErrorCodes
    {
        extern const int TOO_LARGE_SIZE_COMPRESSED;
    }
}


/// Read and check header of compressed block. Print size of decompressed and compressed data.
std::pair<UInt32, UInt32> stat(DB::ReadBuffer & in, DB::WriteBuffer & out)
{
    if (in.eof())
        return {};

    in.ignore(16);    /// checksum

    char header[COMPRESSED_BLOCK_HEADER_SIZE];
    in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE);

    UInt32 size_compressed = unalignedLoad<UInt32>(&header[1]);

    if (size_compressed > DBMS_MAX_COMPRESSED_SIZE)
        throw DB::Exception("Too large size_compressed. Most likely corrupted data.", DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED);

    UInt32 size_decompressed = unalignedLoad<UInt32>(&header[5]);

    return {size_compressed, size_decompressed};
}


void checkCompressedHeaders(const std::string & mrk_path, const std::string & bin_path)
{
    DB::ReadBufferFromFile mrk_in(mrk_path);
    DB::ReadBufferFromFile bin_in(bin_path, 4096);    /// Small buffer size just to check header of compressed block.

    DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO);

    for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num)
    {
        UInt64 offset_in_compressed_file = 0;
        UInt64 offset_in_decompressed_block = 0;

        DB::readBinary(offset_in_compressed_file, mrk_in);
        DB::readBinary(offset_in_decompressed_block, mrk_in);

        out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block << ". ";

        bin_in.seek(offset_in_compressed_file);
        auto sizes = stat(bin_in, out);

        out << "Block sizes: " << sizes.first << ", " << sizes.second << '\n' << DB::flush;
    }
}


void checkByCompressedReadBuffer(const std::string & mrk_path, const std::string & bin_path)
{
    DB::ReadBufferFromFile mrk_in(mrk_path);
    DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0);

    DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO);

    for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num)
    {
        UInt64 offset_in_compressed_file = 0;
        UInt64 offset_in_decompressed_block = 0;

        DB::readBinary(offset_in_compressed_file, mrk_in);
        DB::readBinary(offset_in_decompressed_block, mrk_in);

        out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block << ".\n" << DB::flush;

        bin_in.seek(offset_in_compressed_file, offset_in_decompressed_block);
    }
}


int main(int argc, char ** argv)
{
    boost::program_options::options_description desc("Allowed options");
    desc.add_options()
        ("help,h", "produce help message")
    ;

    boost::program_options::variables_map options;
    boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);

    if (options.count("help") || argc != 3)
    {
        std::cout << "Usage: " << argv[0] << " file.mrk file.bin" << std::endl;
        std::cout << desc << std::endl;
        return 1;
    }

    try
    {
        /// checkCompressedHeaders(argv[1], argv[2]);
        checkByCompressedReadBuffer(argv[1], argv[2]);
    }
    catch (const DB::Exception & e)
    {
        std::cerr << e.what() << ", " << e.message() << std::endl
            << std::endl
            << "Stack trace:" << std::endl
            << e.getStackTrace().toString()
            << std::endl;
        throw;
    }

    return 0;
}