File: validate_utf8_brute_force_tests.cpp

package info (click to toggle)
simdutf 7.7.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,244 kB
  • sloc: cpp: 60,074; ansic: 14,226; python: 3,364; sh: 321; makefile: 12
file content (88 lines) | stat: -rw-r--r-- 2,945 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include "simdutf.h"

#include <tests/helpers/random_utf8.h>
#include <tests/reference/validate_utf8.h>
#include <tests/helpers/test.h>

template <typename T>
static void test_corrupt(T &implementation, uint32_t seed,
                         simdutf::tests::helpers::random_utf8 gen_utf8) {
  std::mt19937 gen(seed);
  for (size_t i = 0; i < 10; i++) {
    auto UTF8 = gen_utf8.generate(1000);
    if (!implementation.validate_utf8((const char *)UTF8.data(), UTF8.size())) {
      puts("bug");
      ASSERT_TRUE(false);
    }
    std::uniform_int_distribution<size_t> distIdx{0, UTF8.size() - 1};
    for (size_t j = 0; j < 1000; ++j) {
      const size_t corrupt = distIdx(gen);
      uint8_t restore = UTF8[corrupt];
      UTF8[corrupt] = uint8_t(gen());
      bool is_ok =
          implementation.validate_utf8((const char *)UTF8.data(), UTF8.size());
      bool is_ok_basic = simdutf::tests::reference::validate_utf8(
          (const char *)UTF8.data(), UTF8.size());
      if (is_ok != is_ok_basic) {
        puts("bug");
        ASSERT_TRUE(false);
      }
      UTF8[corrupt] = restore;
    }
  }
}

TEST(corrupt_1byte) {
  uint32_t seed{1234};
  test_corrupt(implementation, seed,
               simdutf::tests::helpers::random_utf8(seed, 1, 0, 0, 0));
}

TEST(corrupt_2byte) {
  uint32_t seed{1234};
  test_corrupt(implementation, seed,
               simdutf::tests::helpers::random_utf8(seed, 0, 1, 0, 0));
  test_corrupt(implementation, seed,
               simdutf::tests::helpers::random_utf8(seed, 1, 1, 0, 0));
}

TEST(corrupt_3byte) {
  uint32_t seed{1234};
  test_corrupt(implementation, seed,
               simdutf::tests::helpers::random_utf8(seed, 0, 0, 1, 0));
  test_corrupt(implementation, seed,
               simdutf::tests::helpers::random_utf8(seed, 0, 1, 1, 0));
  test_corrupt(implementation, seed,
               simdutf::tests::helpers::random_utf8(seed, 1, 0, 1, 0));
  test_corrupt(implementation, seed,
               simdutf::tests::helpers::random_utf8(seed, 1, 1, 1, 0));
}

TEST(brute_force) {
  uint32_t seed{1234};
  simdutf::tests::helpers::random_utf8 gen_1_2_3_4(seed, 1, 1, 1, 1);
  size_t total = 1000;
  for (size_t i = 0; i < total; i++) {

    auto UTF8 = gen_1_2_3_4.generate(rand() % 256);
    if (!implementation.validate_utf8((const char *)UTF8.data(), UTF8.size())) {
      puts("bug");
      ASSERT_TRUE(false);
    }
    for (size_t flip = 0; flip < 1000; ++flip) {
      // we are going to hack the string as long as it is UTF-8
      const int bitflip{1 << (rand() % 8)};
      UTF8[rand() % UTF8.size()] = uint8_t(bitflip); // we flip exactly one bit
      bool is_ok =
          implementation.validate_utf8((const char *)UTF8.data(), UTF8.size());
      bool is_ok_basic = simdutf::tests::reference::validate_utf8(
          (const char *)UTF8.data(), UTF8.size());
      if (is_ok != is_ok_basic) {
        puts("bug");
        ASSERT_TRUE(false);
      }
    }
  }
}

TEST_MAIN