File: pbf-decoder.cpp

package info (click to toggle)
protozero 1.8.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,548 kB
  • sloc: cpp: 20,364; sh: 18; makefile: 14
file content (278 lines) | stat: -rw-r--r-- 8,904 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*****************************************************************************

Protobuf decoder tool

Tool to decode unknown protocol buffer encoded messages. The protocol buffer
format doesn't contain enough information about the contents of a file to make
it decodable without the format description usually found in a `.proto` file,
so this tool does some informed guessing.

Usage:

    pbf-decoder [OPTIONS] [FILENAME]

Use "-" as a file name to read from STDIN.

The output always goes to STDOUT.

Call with --help/-h to see more options.

*****************************************************************************/

#include <protozero/pbf_reader.hpp>

#include <algorithm>
#include <cctype>
#include <cstddef>
#include <exception>
#include <fstream>
#include <getopt.h>
#include <iostream>
#include <limits>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

namespace {

std::string decode(const char* data, std::size_t len, const std::string& indent);

// Try decoding as a nested message
bool decode_message(std::stringstream& out, const std::string& indent, const protozero::data_view view) {
    try {
        const auto nested = decode(view.data(), view.size(), indent + "  ");
        out << '\n' << nested;
        return true;
    } catch (const protozero::exception&) { // NOLINT(bugprone-empty-catch)
    }
    return false;
}

// Try decoding as a string (only printable characters allowed).
bool decode_printable_string(std::stringstream& out, const protozero::data_view view) {
    static constexpr const std::size_t max_string_length = 60;

    const std::string str{view.data(), view.size()};
    if (str.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_:-") != std::string::npos) {
        return false;
    }

    if (str.size() > max_string_length) {
        out << '"' << str.substr(0, max_string_length) << "\"...\n";
    } else {
        out << '"' << str << '"' << '\n';
    }
    return true;
}

// Try decoding as a string.
bool decode_string(std::stringstream& out, const protozero::data_view view) {
    static constexpr const std::size_t max_string_length = 60;

    const std::string str{view.data(), std::min(view.size(), max_string_length)};
    out << '"';

    for (const auto c : str) {
        if (std::isprint(c) != 0) {
            out << c;
        } else {
            out << '.';
        }
    }

    out << '"' << '\n';

    return true;
}

// Print a list of numbers from a range
template <typename TRange>
void print_number_range(std::stringstream& out, const TRange& range) {
    bool first = true;
    for (auto val : range) {
        if (first) {
            first = false;
        } else {
            out << ',';
        }
        out << val;
    }
    out << '\n';
}

// Try decoding as packed repeated double
bool decode_packed_double(std::stringstream& out, std::size_t size, protozero::pbf_reader& message) {
    if (size % 8 != 0) {
        return false;
    }
    try {
        print_number_range(out, message.get_packed_double());
        return true;
    } catch (const protozero::exception&) { // NOLINT(bugprone-empty-catch)
    }

    return false;
}

// Try decoding as packed repeated float
bool decode_packed_float(std::stringstream& out, std::size_t size, protozero::pbf_reader& message) {
    if (size % 4 != 0) {
        return false;
    }
    try {
        print_number_range(out, message.get_packed_float());
        return true;
    } catch (const protozero::exception&) { // NOLINT(bugprone-empty-catch)
    }

    return false;
}

// Try decoding as packed repeated varint
bool decode_packed_varint(std::stringstream& out, protozero::pbf_reader& message) {
    try {
        print_number_range(out, message.get_packed_int64());
        return true;
    } catch (const protozero::exception&) { // NOLINT(bugprone-empty-catch)
    }

    return false;
}

std::string decode(const char* data, std::size_t len, const std::string& indent) {
    std::stringstream stream;
    protozero::pbf_reader message{data, len};
    while (message.next()) {
        stream << indent << message.tag() << ": ";
        switch (message.wire_type()) {
            case protozero::pbf_wire_type::varint: {
                // This is int32, int64, uint32, uint64, sint32, sint64, bool, or enum.
                // Try decoding as int64.
                stream << message.get_int64() << '\n';
                break;
            }
            case protozero::pbf_wire_type::fixed64:
                // This is fixed64, sfixed64, or double.
                // Try decoding as a double, because int64_t or uint64_t
                // would probably be encoded as varint.
                stream << message.get_double() << '\n';
                break;
            case protozero::pbf_wire_type::length_delimited: {
                // This is string, bytes, embedded messages, or packed repeated fields.
                protozero::pbf_reader message_copy{message};
                const auto view = message.get_view();

                decode_message(stream, indent, view) ||
                    decode_printable_string(stream, view) ||
                    decode_packed_double(stream, view.size(), message_copy) ||
                    decode_packed_float(stream, view.size(), message_copy) ||
                    decode_packed_varint(stream, message_copy) ||
                    decode_string(stream, view);
                break;
            }
            case protozero::pbf_wire_type::fixed32:
                // This is fixed32, sfixed32, or float.
                // Try decoding as a float, because int32_t or uint32_t
                // would probably be encoded as varint.
                stream << message.get_float() << '\n';
                break;
            default:
                throw protozero::unknown_pbf_wire_type_exception{};
        }
    }

    return stream.str();
}

void print_help() {
    std::cout << "Usage: pbf-decoder [OPTIONS] [INPUT_FILE]\n\n"
              << "Dump raw contents of protobuf encoded file.\n"
              << "To read from STDIN use '-' as INPUT_FILE.\n"
              << "\nOptions:\n"
              << "  -h, --help           This help message\n"
              << "  -l, --length=LENGTH  Read only LENGTH bytes\n"
              << "  -o, --offset=OFFSET  Start reading from OFFSET bytes\n";
}

std::vector<char> read_from_file(const char* filename) {
    const std::ifstream file{filename, std::ios::binary};
    return std::vector<char>{std::istreambuf_iterator<char>(file.rdbuf()),
                             std::istreambuf_iterator<char>()};
}

std::vector<char> read_from_stdin() {
    return std::vector<char>{std::istreambuf_iterator<char>(std::cin.rdbuf()),
                             std::istreambuf_iterator<char>()};
}

} // anonymous namespace

int main(int argc, char* argv[]) {
    static struct option long_options[] = {
        {"help",         no_argument, nullptr, 'h'},
        {"length", required_argument, nullptr, 'l'},
        {"offset", required_argument, nullptr, 'o'},
        {nullptr, 0, nullptr, 0}
    };

    std::size_t offset = 0;
    std::size_t length = std::numeric_limits<std::size_t>::max();

    while (true) {
        const int c = getopt_long(argc, argv, "hl:o:", long_options, nullptr); // NOLINT(concurrency-mt-unsafe) no threads here
        if (c == -1) {
            break;
        }

        switch (c) {
            case 'h':
                print_help();
                return 0;
            case 'l':
                length = std::atoll(optarg); // NOLINT(cert-err34-c)
                                             // good enough for a limited-use tool
                break;
            case 'o':
                offset = std::atoll(optarg); // NOLINT(cert-err34-c)
                                             // good enough for a limited-use tool
                break;
            default:
                return 1;
        }
    }

    const int remaining_args = argc - optind;
    if (remaining_args != 1) {
        std::cerr << "Usage: " << argv[0] << " [OPTIONS] [INPUT_FILE]\n\n"
                  << "Call with --help/-h to see options.\n";
        return 1;
    }

    const std::string filename{argv[optind]};

    try {
        std::vector<char> buffer{filename == "-" ? read_from_stdin() :
                                                   read_from_file(argv[optind])};

        if (offset > buffer.size()) {
            throw std::runtime_error{"offset is larger than file size"};
        }

        if (offset > 0) {
            buffer.erase(buffer.begin(), buffer.begin() + offset);
        }

        if (length < buffer.size()) {
            buffer.resize(length);
        }

        std::cout << decode(buffer.data(), buffer.size(), "");
    } catch (const std::exception& ex) {
        std::cerr << ex.what() << '\n';
        return 1;
    }

    return 0;
}