File: safe_conversion.cpp

package info (click to toggle)
simdutf 8.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,524 kB
  • sloc: cpp: 64,498; ansic: 15,347; python: 3,592; sh: 366; makefile: 12
file content (104 lines) | stat: -rw-r--r-- 3,230 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include <cassert>
#include <vector>

#include "simdutf.h"

void test_latin1_to_utf8(std::span<const uint8_t> input_bytes,
                         std::size_t output_size) {
  std::vector<char> output(output_size);
  const auto written_bytes_safe =
      simdutf::convert_latin1_to_utf8_safe(input_bytes, output);
  if (written_bytes_safe > output_size) {
    std::abort();
  }
  const auto needed_size = simdutf::utf8_length_from_latin1(input_bytes);
  std::vector<char> reference(needed_size);
  const auto written_bytes_unsafe =
      simdutf::convert_latin1_to_utf8(input_bytes, reference);
  if (written_bytes_unsafe != needed_size) {
    std::abort();
  }
  if (written_bytes_safe > needed_size) {
    // convert_latin1_to_utf8_safe wrote more output buffer than the unsafe
    // version needed!
    std::abort();
  }
  // ensure output is equal to the beginning of reference
  if (!std::ranges::equal(
          std::span(output).subspan(0, written_bytes_safe),
          std::span(reference).subspan(0, written_bytes_safe))) {
    std::abort();
  }
}

void test_utf16_to_utf8(std::span<const char16_t> input,
                        std::size_t output_size) {
  std::vector<char> output(output_size);
  const auto written_bytes_safe =
      simdutf::convert_utf16_to_utf8_safe(input, output);
  if (written_bytes_safe > output_size) {
    std::abort();
  }
  // result is implementation defined in case of garbage input
  const auto unreliable_needed_size = simdutf::utf8_length_from_utf16(input);
  std::vector<char> reference(unreliable_needed_size);
  const auto written_bytes_unsafe =
      simdutf::convert_utf16_to_utf8(input, reference);

  // ensure output is equal to the beginning of reference
  const auto Ncompare = std::min(written_bytes_safe, written_bytes_unsafe);
  const auto matches =
      std::ranges::equal(std::span(output).subspan(0, Ncompare),
                         std::span(reference).subspan(0, Ncompare));
  assert(matches);
  if (!matches) {
    std::abort();
  }
}

void select_implementation(auto index) {
  static const auto implementations = []() {
    const auto list = simdutf::get_available_implementations();
    using Impl = std::decay_t<decltype(*list.begin())>;
    std::vector<Impl> ret;
    for (auto& e : list) {
      if (e->supported_by_runtime_system()) {
        ret.push_back(e);
      }
    }
    return ret;
  }();
  assert(!implementations.empty());
  simdutf::get_active_implementation() =
      implementations.at(index % implementations.size());
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {

  if (size < 4) {
    return 0;
  }

  const auto action = data[0] & 0x1;
  const auto output_size = (data[1] << 8 | data[2]);
  const auto implementation_index = data[3] & 0b0111;
  data += 4;
  size -= 4;

  const std::span<const uint8_t> input_bytes{data, data + size};

  select_implementation(implementation_index);

  switch (action) {
  case 0:
    test_latin1_to_utf8(input_bytes, output_size);
    break;
  case 1: {
    const auto* ptr = reinterpret_cast<const char16_t*>(input_bytes.data());
    test_utf16_to_utf8(std::span(ptr, ptr + input_bytes.size() / 2),
                       output_size);
  } break;
  }

  return 0;
}