File: validate_utf8_puzzler_tests.cpp

package info (click to toggle)
simdutf 7.7.1-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 7,244 kB
  • sloc: cpp: 60,074; ansic: 14,226; python: 3,364; sh: 321; makefile: 12
file content (33 lines) | stat: -rw-r--r-- 1,398 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#include "simdutf.h"

#include <tests/helpers/test.h>

// This is an attempt at reproducing an issue with the utf8 fuzzer
TEST(puzzler) {
  const char *bad64 =
      "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00"
      "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00"
      "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
      "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
  size_t length = 64;
  ASSERT_FALSE(implementation.validate_utf8(bad64, length));
}

TEST(puzzler2) {
  // Interesting case where the error occurs in the first 64 bytes but is only
  // detected in the next 64 bytes.
  const char *bad102 =
      "\x0a\x04\x00\x00\xdb\xa1\xdd\xa1\xf1\xa0\xb6\x95\xe4\xb5\x89\xe7\x8f\x95"
      "\xe4\xa2\x83\xe7\x95\x89\xe7\x95\x91\xe7\x95\x89\x00\x01\x01\x1a\x20\x28"
      "\x00\x00\x60\x00\x00\x23\x00\xf1\xa0\xb6\x95\xe4\xb5\x89\xe7\x8f\x95\xe4"
      "\xa2\x83\xe7\x95\x89\xe7\x95\x91\xe7\x81\x00\x00\x01\x01\x1a\x20\x28\x00"
      "\x00\x60\x00\x00\x23\x00\x2f\x00\x00\x00\x00\x07\x04\x75\xc2\xa0\x34\x2f"
      "\x00\x00\x00\x00\x07\x04\x75\xc2\xa0\x33\x53\x2b";
  size_t length = 102;
  ASSERT_FALSE(implementation.validate_utf8(bad102, length));
  auto r = implementation.validate_utf8_with_errors(bad102, length);
  ASSERT_EQUAL(r.error, simdutf::TOO_SHORT);
  ASSERT_EQUAL(r.count, 62);
}

TEST_MAIN