File: FileFingerprint_CRC_test.cpp

package info (click to toggle)
megacmd 2.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 32,592 kB
  • sloc: cpp: 326,437; ansic: 34,524; python: 4,630; java: 3,965; sh: 2,869; objc: 2,459; makefile: 197; xml: 113
file content (364 lines) | stat: -rw-r--r-- 11,324 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
// === Unified CRC tests: production (IA/FA) + 32-bit-overflow emulation =======

#include "DefaultedFileAccess.h"

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <mega/base64.h>
#include <mega/crypto/cryptopp.h>

#include <bitset>

#if !defined(_WIN32)
#include <arpa/inet.h> // htonl
#endif

namespace
{
using ::mega::byte;
using ::testing::ContainerEq;

using CRCLanes = std::array<std::uint32_t, 4>;

constexpr std::uint64_t operator"" _MiB(const unsigned long long n) noexcept
{
    return n * 1024ull * 1024ull;
}

struct Layout
{
    static constexpr unsigned kLanes{4};
    static constexpr unsigned kBlocks{32};
    static constexpr unsigned kWindowBytes{64}; // bytes per sampled window
    static constexpr unsigned kDenominator{kLanes * kBlocks - 1}; // 127
    static constexpr std::uint64_t kWindowU{kWindowBytes};
};

constexpr std::uint32_t kDeterministicSeed{0xA5A5A5A5u}; // stable non-trivial PRNG seed
constexpr std::int64_t kTestMtimeSecs{1'700'000'000};
constexpr std::size_t kCrcBytes{Layout::kLanes * 4};
constexpr std::uint32_t kEqMask40MiB{0b0111u};
constexpr std::uint32_t kEqMask52MiB{0b0011u};
constexpr std::uint32_t kEqMask88MiB{0b0001u};

// ---------- Minimal in-memory IA and FA (exercise production code) -----------

class MemIA final: public ::mega::InputStreamAccess
{
public:
    explicit MemIA(const std::vector<byte>& data):
        mData(data)
    {}

    m_off_t size() override
    {
        return static_cast<m_off_t>(mData.size());
    }

    bool read(byte* buffer, const unsigned n) override
    {
        if (!buffer)
        { // skip/seek forward by n
            if (mPos + n > mData.size())
                return false;
            mPos += n;
            return true;
        }
        if (mPos + n > mData.size())
            return false;
        std::memcpy(buffer, &mData[mPos], n);
        mPos += n;
        return true;
    }

private:
    const std::vector<byte>& mData;
    std::size_t mPos{0};
};

class MemFA final: public ::mt::DefaultedFileAccess
{
public:
    MemFA(const std::vector<byte>& data, const ::mega::m_time_t mt):
        mData(data)
    {
        mtime = mt;
        size = static_cast<m_off_t>(data.size());
    }

    bool openf(::mega::FSLogging) override
    {
        mIsOpen = true;
        return true;
    }

    void closef() override
    {
        mIsOpen = false;
    }

    bool frawread(void* buf,
                  unsigned long n,
                  m_off_t off,
                  bool /*nolock*/,
                  ::mega::FSLogging,
                  bool* /*retry*/ = nullptr) override
    {
        if (!mIsOpen || off < 0)
            return false;

        const auto nbytes = static_cast<std::size_t>(n);
        const auto offsz = static_cast<std::size_t>(off);

        if (offsz > mData.size() || nbytes > (mData.size() - offsz))
            return false;

        if (buf)
            std::memcpy(buf, mData.data() + offsz, nbytes);
        return true;
    }

private:
    bool mIsOpen{false};
    const std::vector<byte>& mData;
};

// --------- Utilities ---------------------------------------------------------

template<typename T, std::size_t N, typename Mask = std::uint32_t>
[[nodiscard]] inline Mask laneEqMaskBitset(const std::array<T, N>& a, const std::array<T, N>& b)
{
    static_assert(N <= std::numeric_limits<Mask>::digits,
                  "Mask too narrow for number of lanes; use a wider Mask.");
    std::bitset<N> bits;
    for (std::size_t i = 0; i < N; ++i)
        bits.set(i, a[i] == b[i]);
    return static_cast<Mask>(bits.to_ulong());
}

[[nodiscard]] inline std::uint32_t htonl_u32(const std::uint32_t x) noexcept
{
#if defined(_WIN32)
    return _byteswap_ulong(x);
#else
    return htonl(x);
#endif
}

// Extract the 22-char CRC b64 from size:mtime:CRC:valid
[[nodiscard]] std::string crcB64FromDbg(const std::string& dbg)
{
    const auto p1 = dbg.find(':');
    if (p1 == std::string::npos)
        return {};
    const auto p2 = dbg.find(':', p1 + 1);
    if (p2 == std::string::npos)
        return {};
    const auto p3 = dbg.find(':', p2 + 1);
    if (p3 == std::string::npos)
        return {};
    return dbg.substr(p2 + 1, p3 - (p2 + 1));
}

[[nodiscard]] CRCLanes b64ToLanesHost(const std::string& b64)
{
    CRCLanes out{};
    byte buf[kCrcBytes]{};
    const auto n = ::mega::Base64::atob(b64.c_str(), buf, static_cast<int>(kCrcBytes));
    if (n == static_cast<int>(kCrcBytes))
    {
        std::memcpy(out.data(), buf, kCrcBytes); // stored as host-endian words
    }
    return out;
}

[[nodiscard]] std::string lanesToB64(const CRCLanes& lanesHost)
{
    byte raw[kCrcBytes];
    std::memcpy(raw, lanesHost.data(), kCrcBytes);

    // base64 output capacity = 4 * ceil(N / 3)
    const auto cap = static_cast<std::size_t>(4 * ((kCrcBytes + 2) / 3));
    std::string out(cap, '\0');
    const auto outSize = ::mega::Base64::btoa(raw, static_cast<int>(kCrcBytes), out.data());
    if (outSize < 0)
        return {};
    out.resize(static_cast<std::size_t>(outSize));
    return out;
}

// Deterministic PRNG (xorshift32) for fully stable bytes across platforms
void fillDeterministic(std::vector<byte>& buf, const std::uint32_t seed = kDeterministicSeed)
{
    std::uint32_t x = seed;
    for (auto& b: buf)
    {
        x ^= x << 13;
        x ^= x >> 17;
        x ^= x << 5;
        b = static_cast<byte>(x & 0xFF);
    }
}

// ---------- Buggy 32-bit overflow emulation (for comparison) -----------------

[[nodiscard]] inline std::uint64_t sparseOffset64(const std::uint64_t size,
                                                  const unsigned lane,
                                                  const unsigned j) noexcept
{
    const std::uint64_t idx = std::uint64_t(lane) * Layout::kBlocks + j;
    const std::uint64_t numer = (size - Layout::kWindowU) * idx; // 64-bit multiply
    const std::uint64_t off = Layout::kDenominator ? (numer / Layout::kDenominator) : 0;
    const std::uint64_t max = size - Layout::kWindowU;
    return off > max ? max : off;
}

// Emulates the 32-bit multiply (overflow) & 32-bit divide bug
[[nodiscard]] inline std::uint64_t sparseOffset32_bug(const std::uint64_t size,
                                                      const unsigned lane,
                                                      const unsigned j) noexcept
{
    const std::uint32_t sz32 = static_cast<std::uint32_t>(size);
    const std::uint32_t idx32 = static_cast<std::uint32_t>(lane * Layout::kBlocks + j);
    const std::uint32_t numer =
        static_cast<std::uint32_t>((sz32 - Layout::kWindowBytes) * idx32); // wraps
    const std::uint32_t off32 = Layout::kDenominator ? (numer / Layout::kDenominator) : 0;
    const std::uint64_t max = size - Layout::kWindowU;
    return off32 > max ? max : off32;
}

void computeCrcFromBytes(const std::vector<byte>& data,
                         const bool use64Fix,
                         CRCLanes& lanesHost_out)
{
    for (unsigned li = 0; li < Layout::kLanes; ++li)
    {
        ::mega::HashCRC32 crc;
        for (unsigned j = 0; j < Layout::kBlocks; ++j)
        {
            const auto off = use64Fix ?
                                 sparseOffset64(static_cast<std::uint64_t>(data.size()), li, j) :
                                 sparseOffset32_bug(static_cast<std::uint64_t>(data.size()), li, j);
            crc.add(&data[static_cast<std::size_t>(off)], Layout::kWindowBytes);
        }
        std::int32_t v{0};
        crc.get(reinterpret_cast<byte*>(&v));
        lanesHost_out[li] = htonl_u32(static_cast<std::uint32_t>(v)); // match cloud packing
    }
}

[[nodiscard]] inline CRCLanes computeCrcFromBytes(const std::vector<byte>& data,
                                                  const bool use64Fix)
{
    CRCLanes lanes{};
    computeCrcFromBytes(data, use64Fix, lanes);
    return lanes;
}

// ---------- Shared helper to compute + check one synthetic case --------------

struct SynthResult
{
    std::string goodB64;
    std::string bugB64;
};

void runOneSyntheticCase(const std::uint64_t sizeBytes,
                         const std::uint32_t seed,
                         const std::string_view label,
                         const std::uint32_t expectedEqMask,
                         SynthResult& out)
{
    // Create deterministic data
    std::vector<byte> data(static_cast<std::size_t>(sizeBytes));
    fillDeterministic(data, seed);

    // Production (IA)
    std::string goodB64_IA;
    {
        MemIA ia(data);
        ::mega::FileFingerprint fp;
        ASSERT_TRUE(fp.genfingerprint(&ia, /*cmtime*/ kTestMtimeSecs, /*ignoremtime*/ false));
        goodB64_IA = crcB64FromDbg(fp.fingerprintDebugString());
    }

    // Production (FA)
    std::string goodB64_FA;
    {
        MemFA fa(data, /*mtime*/ kTestMtimeSecs);
        ::mega::FileFingerprint fp;
        ASSERT_TRUE(fp.genfingerprint(&fa, /*ignoremtime*/ false));
        goodB64_FA = crcB64FromDbg(fp.fingerprintDebugString());
    }

    // Reference "good" emulation via helper (64-bit math)
    const auto goodCRClanes = computeCrcFromBytes(data, /*use64Fix=*/true);
    const auto goodB64_ref = lanesToB64(goodCRClanes);

    {
        const auto goodHostLanesFromB64 = b64ToLanesHost(goodB64_ref);
        ASSERT_THAT(goodCRClanes, ContainerEq(goodHostLanesFromB64));
    }

    EXPECT_EQ(goodB64_IA, goodB64_ref) << "IA/ref mismatch for " << label;
    EXPECT_EQ(goodB64_FA, goodB64_ref) << "FA/ref mismatch for " << label;
    EXPECT_EQ(goodB64_IA, goodB64_FA) << "IA/FA mismatch for " << label;

    // Buggy emulation
    const auto bugCRClanes = computeCrcFromBytes(data, /*use64Fix=*/false);
    const auto bugB64 = lanesToB64(bugCRClanes);

    {
        const auto badHostLanesFromB64 = b64ToLanesHost(bugB64);
        ASSERT_THAT(bugCRClanes, ContainerEq(badHostLanesFromB64));
    }

    EXPECT_NE(goodB64_ref, bugB64) << "Buggy CRC should differ for " << label;

    const auto eqMask = laneEqMaskBitset(goodCRClanes, bugCRClanes);
    EXPECT_EQ(eqMask, expectedEqMask) << "Unexpected lane pattern for " << label;

    out.goodB64 = goodB64_ref;
    out.bugB64 = bugB64;
}

} // namespace

TEST(FileFingerprint, CRC64Fix_Synth_40MiB_GoodVsBuggy)
{
    SynthResult r;
    ASSERT_NO_FATAL_FAILURE(
        runOneSyntheticCase(40_MiB, kDeterministicSeed, "40MiB", kEqMask40MiB, r));

    static constexpr const char* kGood{"6iqpUy7DdAKx5NIRg31i_g"};
    static constexpr const char* kBug{"6iqpUy7DdAKx5NIRGX1AAA"};

    EXPECT_EQ(r.goodB64, kGood);
    EXPECT_EQ(r.bugB64, kBug);
}

TEST(FileFingerprint, CRC64Fix_Synth_52MiB_GoodVsBuggy)
{
    SynthResult r;
    ASSERT_NO_FATAL_FAILURE(
        runOneSyntheticCase(52_MiB, kDeterministicSeed, "52MiB", kEqMask52MiB, r));

    static constexpr const char* kGood{"7SMVr_-v9_H7MDsN9yuVGA"};
    static constexpr const char* kBug{"7SMVr_-v9_Gk00B4SWd30g"};

    EXPECT_EQ(r.goodB64, kGood);
    EXPECT_EQ(r.bugB64, kBug);
}

TEST(FileFingerprint, CRC64Fix_Synth_88MiB_GoodVsBuggy)
{
    SynthResult r;
    ASSERT_NO_FATAL_FAILURE(
        runOneSyntheticCase(88_MiB, kDeterministicSeed, "88MiB", kEqMask88MiB, r));

    static constexpr const char* kGood{"3hhTVPVhwzudmjN1odbO6w"};
    static constexpr const char* kBug{"3hhTVIMatxXS_18ZkPyITg"};

    EXPECT_EQ(r.goodB64, kGood);
    EXPECT_EQ(r.bugB64, kBug);
}