1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
// { dg-options "-lstdc++exp" }
// { dg-do run { target c++23 } }
// { dg-require-fileio "" }
#include <print>
#include <system_error>
#include <climits>
#include <cstdio>
#include <cstring>
#include <testsuite_hooks.h>
#include <testsuite_fs.h>
#ifdef _WIN32
#include <io.h>
#endif
namespace std
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
// This is an internal implementation detail that must not be used directly.
// We need to use it here to test the behaviour
error_code __write_to_terminal(void*, span<char>);
_GLIBCXX_END_NAMESPACE_VERSION
}
// Test the internal __write_to_terminal function that vprintf_unicode uses.
// The string parameter will be written to a file, then the bytes of the file
// will be read back again. On Windows those bytes will be a UTF-16 string.
// Returns true if the string was valid UTF-8.
bool
as_printed_to_terminal(std::string& s)
{
__gnu_test::scoped_file f;
FILE* strm = std::fopen(f.path.string().c_str(), "w");
VERIFY( strm );
#ifdef _WIN32
void* handle = (void*)_get_osfhandle(_fileno(strm));
const auto ec = std::__write_to_terminal(handle, s);
#else
const auto ec = std::__write_to_terminal(strm, s);
#endif
if (ec && ec != std::make_error_code(std::errc::illegal_byte_sequence))
{
std::println("Failed to : {}", ec.message());
VERIFY(!ec);
}
std::fclose(strm);
std::ifstream in(f.path);
s.assign(std::istreambuf_iterator<char>(in), {});
return !ec;
}
void
test_utf8_validation()
{
#ifndef _WIN32
std::string s = (const char*)u8"£🇬🇧 €🇪🇺";
const std::string s2 = s;
VERIFY( as_printed_to_terminal(s) );
VERIFY( s == s2 );
s += " \xa3 10.99 \xee \xdd";
const std::string s3 = s;
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( s != s3 );
std::string repl = (const char*)u8"\uFFFD";
const std::string s4 = s2 + " " + repl + " 10.99 " + repl + " " + repl;
VERIFY( s == s4 );
s = "\xc0\x80";
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( s == repl + repl );
s = "\xc0\xae";
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( s == repl + repl );
// Examples of U+FFFD substitution from Unicode standard.
std::string r4 = repl + repl + repl + repl;
s = "\xc0\xaf\xe0\x80\xbf\xf0\x81\x82\x41"; // Table 3-8
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( s == r4 + r4 + "\x41" );
s = "\xed\xa0\x80\xed\xbf\xbf\xed\xaf\x41"; // Table 3-9
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( s == r4 + r4 + "\x41" );
s = "\xf4\x91\x92\x93\xff\x41\x80\xbf\x42"; // Table 3-10
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( s == r4 + repl + "\x41" + repl + repl + "\x42" );
s = "\xe1\x80\xe2\xf0\x91\x92\xf1\xbf\x41"; // Table 3-11
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( s == r4 + "\x41" );
#endif
}
// Create a std::u16string from the bytes in a std::string.
std::u16string
utf16_from_bytes(const std::string& s)
{
std::u16string u16;
// s should have an even number of bytes. If it doesn't, we'll copy its
// null terminator into the result, which will not match the expected value.
const auto len = (s.size() + 1) / 2;
u16.resize_and_overwrite(len, [&s](char16_t* p, size_t n) {
std::memcpy(p, s.data(), n * sizeof(char16_t));
return n;
});
return u16;
}
void
test_utf16_transcoding()
{
#ifdef _WIN32
// FIXME: We can't test __write_to_terminal for Windows, because it
// returns an INVALID_HANDLE Windows error when writing to a normal file.
std::string s = (const char*)u8"£🇬🇧 €🇪🇺";
const std::u16string s2 = u"£🇬🇧 €🇪🇺";
VERIFY( as_printed_to_terminal(s) );
VERIFY( utf16_from_bytes(s) == s2 );
s = (const char*)u8"£🇬🇧 €🇪🇺";
s += " \xa3 10.99 \xee\xdd";
VERIFY( ! as_printed_to_terminal(s) );
std::u16string repl = u"\uFFFD";
const std::u16string s3 = s2 + u" " + repl + u" 10.99 " + repl + repl;
VERIFY( utf16_from_bytes(s) == s3 );
s = "\xc0\x80";
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( utf16_from_bytes(s) == repl + repl );
s = "\xc0\xae";
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( utf16_from_bytes(s) == repl + repl );
// Examples of U+FFFD substitution from Unicode standard.
std::u16string r4 = repl + repl + repl + repl;
s = "\xc0\xaf\xe0\x80\xbf\xf0\x81\x82\x41"; // Table 3-8
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( utf16_from_bytes(s) == r4 + r4 + u"\x41" );
s = "\xed\xa0\x80\xed\xbf\xbf\xed\xaf\x41"; // Table 3-9
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( utf16_from_bytes(s) == r4 + r4 + u"\x41" );
s = "\xf4\x91\x92\x93\xff\x41\x80\xbf\x42"; // Table 3-10
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( utf16_from_bytes(s) == r4 + repl + u"\x41" + repl + repl + u"\x42" );
s = "\xe1\x80\xe2\xf0\x91\x92\xf1\xbf\x41"; // Table 3-11
VERIFY( ! as_printed_to_terminal(s) );
VERIFY( utf16_from_bytes(s) == r4 + u"\x41" );
#endif
}
int main()
{
test_utf8_validation();
test_utf16_transcoding();
}
|