1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
#include "ftest.h"
#define UTF_CPP_CPLUSPLUS 202002L
#include "utf8.h"
#include <string>
using namespace utf8;
using namespace std;
TEST(CPP20APITests, test_utf16tou8)
{
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
u16string_view utf16stringview{utf16string};
u8string u = utf16tou8(utf16string);
EXPECT_EQ (u.size(), 10);
u = utf16tou8(utf16stringview);
EXPECT_EQ (u.size(), 10);
}
TEST(CPP20APITests, tes20t_utf8to16)
{
u8string utf8_with_surrogates{ reinterpret_cast<const char8_t*>("\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e") };
u16string utf16result = utf8to16(utf8_with_surrogates);
EXPECT_EQ (utf16result.size(), 4);
EXPECT_EQ (utf16result[2], 0xd834);
EXPECT_EQ (utf16result[3], 0xdd1e);
}
TEST(CPP20APITests, test_utf32tou8)
{
u32string utf32string = {0x448, 0x65E5, 0x10346};
u32string_view utf32stringview{utf32string};
u8string utf8result = utf32tou8(utf32stringview);
EXPECT_EQ (utf8result.size(), 9);
}
TEST(CPP20APITests, test_utf8to32)
{
u8string twochars = reinterpret_cast<const char8_t*>("\xe6\x97\xa5\xd1\x88");
u32string utf32result = utf8to32(twochars);
EXPECT_EQ (utf32result.size(), 2);
}
TEST(CPP20APITests, test_find_invalid)
{
u8string utf_invalid = reinterpret_cast<const char8_t*>("\xe6\x97\xa5\xd1\x88\xfa");
auto invalid = find_invalid(utf_invalid);
EXPECT_EQ (invalid, 5);
}
TEST(CPP20APITests, test_is_valid)
{
u8string utf_invalid = reinterpret_cast<const char8_t*>("\xe6\x97\xa5\xd1\x88\xfa");
bool bvalid = is_valid(utf_invalid);
EXPECT_FALSE (bvalid);
u8string utf8_with_surrogates = reinterpret_cast<const char8_t*>("\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e");
bvalid = is_valid(utf8_with_surrogates);
EXPECT_TRUE (bvalid);
}
TEST(CPP20APITests, test_replace_invalid)
{
u8string invalid_sequence = reinterpret_cast<const char8_t*>("a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z");
u8string replace_invalid_result = replace_invalid(invalid_sequence, u8'?');
bool bvalid = is_valid(replace_invalid_result);
EXPECT_TRUE (bvalid);
const u8string fixed_invalid_sequence = reinterpret_cast<const char8_t*>("a????z");
EXPECT_EQ(fixed_invalid_sequence, replace_invalid_result);
}
TEST(CPP20APITests, test_starts_with_bom)
{
u8string byte_order_mark = reinterpret_cast<const char8_t*>("\xef\xbb\xbf");
bool bbom = starts_with_bom(byte_order_mark);
EXPECT_TRUE (bbom);
u8string threechars = reinterpret_cast<const char8_t*>("\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88");
bool no_bbom = starts_with_bom(threechars);
EXPECT_FALSE (no_bbom);
}
|