File: 2.cc

package info (click to toggle)
gcc-arm-none-eabi 15%3A14.2.rel1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,099,328 kB
  • sloc: cpp: 3,627,108; ansic: 2,571,498; ada: 834,230; f90: 235,082; makefile: 79,231; asm: 74,984; xml: 51,692; exp: 39,736; sh: 33,298; objc: 15,629; python: 15,069; fortran: 14,429; pascal: 7,003; awk: 5,070; perl: 3,106; ml: 285; lisp: 253; lex: 204; haskell: 135
file content (156 lines) | stat: -rw-r--r-- 4,934 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// { dg-options "-lstdc++exp" }
// { dg-do run { target c++23 } }
// { dg-require-fileio "" }

#include <print>
#include <system_error>
#include <climits>
#include <cstdio>
#include <cstring>
#include <testsuite_hooks.h>
#include <testsuite_fs.h>

#ifdef _WIN32
#include <io.h>
#endif

namespace std
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
  // This is an internal implementation detail that must not be used directly.
  // We need to use it here to test the behaviour
  error_code __write_to_terminal(void*, span<char>);
_GLIBCXX_END_NAMESPACE_VERSION
}

// Test the internal __write_to_terminal function that vprintf_unicode uses.
// The string parameter will be written to a file, then the bytes of the file
// will be read back again. On Windows those bytes will be a UTF-16 string.
// Returns true if the string was valid UTF-8.
bool
as_printed_to_terminal(std::string& s)
{
  __gnu_test::scoped_file f;
  FILE* strm = std::fopen(f.path.string().c_str(), "w");
  VERIFY( strm );
#ifdef _WIN32
  void* handle = (void*)_get_osfhandle(_fileno(strm));
  const auto ec = std::__write_to_terminal(handle, s);
#else
  const auto ec = std::__write_to_terminal(strm, s);
#endif
  if (ec && ec != std::make_error_code(std::errc::illegal_byte_sequence))
    {
      std::println("Failed to : {}", ec.message());
      VERIFY(!ec);
    }
  std::fclose(strm);
  std::ifstream in(f.path);
  s.assign(std::istreambuf_iterator<char>(in), {});
  return !ec;
}

void
test_utf8_validation()
{
#ifndef _WIN32
  std::string s = (const char*)u8"£🇬🇧 €🇪🇺";
  const std::string s2 = s;
  VERIFY( as_printed_to_terminal(s) );
  VERIFY( s == s2 );

  s += " \xa3 10.99 \xee \xdd";
  const std::string s3 = s;
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( s != s3 );
  std::string repl = (const char*)u8"\uFFFD";
  const std::string s4 = s2 + " " + repl + " 10.99 " + repl + " " + repl;
  VERIFY( s == s4 );

  s = "\xc0\x80";
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( s == repl + repl );
  s = "\xc0\xae";
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( s == repl + repl );

  // Examples of U+FFFD substitution from Unicode standard.
  std::string r4 = repl + repl + repl + repl;
  s = "\xc0\xaf\xe0\x80\xbf\xf0\x81\x82\x41"; // Table 3-8
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( s == r4 + r4 + "\x41" );
  s = "\xed\xa0\x80\xed\xbf\xbf\xed\xaf\x41"; // Table 3-9
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( s == r4 + r4 + "\x41" );
  s = "\xf4\x91\x92\x93\xff\x41\x80\xbf\x42"; // Table 3-10
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( s == r4 + repl + "\x41" + repl + repl + "\x42" );
  s = "\xe1\x80\xe2\xf0\x91\x92\xf1\xbf\x41"; // Table 3-11
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( s == r4 + "\x41" );
#endif
}

// Create a std::u16string from the bytes in a std::string.
std::u16string
utf16_from_bytes(const std::string& s)
{
  std::u16string u16;
  // s should have an even number of bytes. If it doesn't, we'll copy its
  // null terminator into the result, which will not match the expected value.
  const auto len = (s.size() + 1) / 2;
  u16.resize_and_overwrite(len, [&s](char16_t* p, size_t n) {
    std::memcpy(p, s.data(), n * sizeof(char16_t));
    return n;
  });
  return u16;
}

void
test_utf16_transcoding()
{
#ifdef _WIN32
  // FIXME: We can't test __write_to_terminal for Windows, because it
  // returns an INVALID_HANDLE Windows error when writing to a normal file.

  std::string s = (const char*)u8"£🇬🇧 €🇪🇺";
  const std::u16string s2 = u"£🇬🇧 €🇪🇺";
  VERIFY( as_printed_to_terminal(s) );
  VERIFY( utf16_from_bytes(s) == s2 );

  s = (const char*)u8"£🇬🇧 €🇪🇺";
  s += " \xa3 10.99 \xee\xdd";
  VERIFY( ! as_printed_to_terminal(s) );
  std::u16string repl = u"\uFFFD";
  const std::u16string s3 = s2 + u" " + repl + u" 10.99 " + repl + repl;
  VERIFY( utf16_from_bytes(s) == s3 );

  s = "\xc0\x80";
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( utf16_from_bytes(s) == repl + repl );
  s = "\xc0\xae";
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( utf16_from_bytes(s) == repl + repl );

  // Examples of U+FFFD substitution from Unicode standard.
  std::u16string r4 = repl + repl + repl + repl;
  s = "\xc0\xaf\xe0\x80\xbf\xf0\x81\x82\x41"; // Table 3-8
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( utf16_from_bytes(s) == r4 + r4 + u"\x41" );
  s = "\xed\xa0\x80\xed\xbf\xbf\xed\xaf\x41"; // Table 3-9
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( utf16_from_bytes(s) == r4 + r4 + u"\x41" );
  s = "\xf4\x91\x92\x93\xff\x41\x80\xbf\x42"; // Table 3-10
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( utf16_from_bytes(s) == r4 + repl + u"\x41" + repl + repl + u"\x42" );
  s = "\xe1\x80\xe2\xf0\x91\x92\xf1\xbf\x41"; // Table 3-11
  VERIFY( ! as_printed_to_terminal(s) );
  VERIFY( utf16_from_bytes(s) == r4 + u"\x41" );
#endif
}

int main()
{
  test_utf8_validation();
  test_utf16_transcoding();
}