1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/strings/sys_string_conversions.h"
#include <stddef.h>
#include <string.h>
#include <wchar.h>
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
namespace base {
std::string SysWideToUTF8(const std::wstring& wide) {
// In theory this should be using the system-provided conversion rather
// than our ICU, but this will do for now.
return WideToUTF8(wide);
}
std::wstring SysUTF8ToWide(StringPiece utf8) {
// In theory this should be using the system-provided conversion rather
// than our ICU, but this will do for now.
std::wstring out;
UTF8ToWide(utf8.data(), utf8.size(), &out);
return out;
}
#if defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
// TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
// support and a better understanding of what calls these routines.
std::string SysWideToNativeMB(const std::wstring& wide) {
return WideToUTF8(wide);
}
std::wstring SysNativeMBToWide(StringPiece native_mb) {
return SysUTF8ToWide(native_mb);
}
#else
std::string SysWideToNativeMB(const std::wstring& wide) {
mbstate_t ps;
// Calculate the number of multi-byte characters. We walk through the string
// without writing the output, counting the number of multi-byte characters.
size_t num_out_chars = 0;
memset(&ps, 0, sizeof(ps));
for (auto src : wide) {
// Use a temp buffer since calling wcrtomb with an output of NULL does not
// calculate the output length.
char buf[16];
// Skip NULLs to avoid wcrtomb's special handling of them.
size_t res = src ? wcrtomb(buf, src, &ps) : 0;
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-1):
return std::string();
case 0:
// We hit an embedded null byte, keep going.
++num_out_chars;
break;
default:
num_out_chars += res;
break;
}
}
if (num_out_chars == 0)
return std::string();
std::string out;
out.resize(num_out_chars);
// We walk the input string again, with |i| tracking the index of the
// wide input, and |j| tracking the multi-byte output.
memset(&ps, 0, sizeof(ps));
for (size_t i = 0, j = 0; i < wide.size(); ++i) {
const wchar_t src = wide[i];
// We don't want wcrtomb to do its funkiness for embedded NULLs.
size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-1):
return std::string();
case 0:
// We hit an embedded null byte, keep going.
++j; // Output is already zeroed.
break;
default:
j += res;
break;
}
}
return out;
}
std::wstring SysNativeMBToWide(StringPiece native_mb) {
mbstate_t ps;
// Calculate the number of wide characters. We walk through the string
// without writing the output, counting the number of wide characters.
size_t num_out_chars = 0;
memset(&ps, 0, sizeof(ps));
for (size_t i = 0; i < native_mb.size(); ) {
const char* src = native_mb.data() + i;
size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps);
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-2):
case static_cast<size_t>(-1):
return std::wstring();
case 0:
// We hit an embedded null byte, keep going.
i += 1;
[[fallthrough]];
default:
i += res;
++num_out_chars;
break;
}
}
if (num_out_chars == 0)
return std::wstring();
std::wstring out;
out.resize(num_out_chars);
memset(&ps, 0, sizeof(ps)); // Clear the shift state.
// We walk the input string again, with |i| tracking the index of the
// multi-byte input, and |j| tracking the wide output.
for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
const char* src = native_mb.data() + i;
wchar_t* dst = &out[j];
size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
switch (res) {
// Handle any errors and return an empty string.
case static_cast<size_t>(-2):
case static_cast<size_t>(-1):
return std::wstring();
case 0:
i += 1; // Skip null byte.
break;
default:
i += res;
break;
}
}
return out;
}
#endif // defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
} // namespace base
|