1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
|
#pragma once
#include <cstddef>
#include <cstdint>
#include <random>
#include <vector>
namespace simdutf {
namespace tests {
namespace helpers {
/*
Generates valid random UTF-16LE
It might generate streams consisting:
- only single 16-bit code units (random_utf16(..., 1, 0));
- only surrogate pairs, two 16-bit code units (random_utf16(..., 0, 1))
- mixed, depending on given probabilities (random_utf16(..., 1, 1))
*/
class random_utf16 {
std::mt19937 gen;
public:
random_utf16(uint32_t seed, int single_word_prob, int two_words_probability)
: gen{seed},
utf16_length({double(single_word_prob), double(single_word_prob),
double(2 * two_words_probability)}) {}
std::vector<char16_t> generate_le(size_t size);
std::vector<char16_t> generate_be(size_t size);
std::vector<char16_t> generate_le(size_t size, long seed);
std::vector<char16_t> generate_be(size_t size, long seed);
std::pair<std::vector<char16_t>, size_t> generate_counted_le(size_t size);
std::pair<std::vector<char16_t>, size_t> generate_counted_be(size_t size);
static void to_ascii_le(std::vector<char16_t> &output);
static void to_ascii_be(std::vector<char16_t> &output);
private:
std::pair<std::vector<char16_t>, size_t> generate_counted(size_t size);
private:
std::discrete_distribution<> utf16_length;
std::uniform_int_distribution<uint32_t> single_word0{0x00000000, 0x0000d7ff};
std::uniform_int_distribution<uint32_t> single_word1{0x0000e000, 0x0000ffff};
std::uniform_int_distribution<uint32_t> two_words{0x00010000, 0x0010ffff};
uint32_t generate();
};
} // namespace helpers
} // namespace tests
} // namespace simdutf
|