1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
// Copyright Vladimir Prus 2002-2004.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <cassert>
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <boost/progress.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
#include <boost/program_options/detail/convert.hpp>
#include <boost/program_options/detail/utf8_codecvt_facet.hpp>
#include "minitest.hpp"
using namespace std;
string file_content(const string& filename)
{
ifstream ifs(filename.c_str());
assert(ifs);
stringstream ss;
ss << ifs.rdbuf();
return ss.str();
}
// A version of from_8_bit which does not use functional object, for
// performance comparison.
std::wstring from_8_bit_2(const std::string& s,
const codecvt<wchar_t, char, mbstate_t>& cvt)
{
std::wstring result;
std::mbstate_t state = {0};
const char* from = s.data();
const char* from_end = s.data() + s.size();
// The interace of cvt is not really iterator-like, and it's
// not possible the tell the required output size without the conversion.
// All we can is convert data by pieces.
while(from != from_end) {
// std::basic_string does not provide non-const pointers to the data,
// so converting directly into string is not possible.
wchar_t buffer[32];
wchar_t* to_next = buffer;
// Try to convert remaining input.
std::codecvt_base::result r =
cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next);
if (r == std::codecvt_base::error)
throw logic_error("character conversion failed");
// 'partial' is not an error, it just means not all source characters
// we converted. However, we need to check that at least one new target
// character was produced. If not, it means the source data is
// incomplete, and since we don't have extra data to add to source, it's
// error.
if (to_next == buffer)
throw logic_error("character conversion failed");
// Add converted characters
result.append(buffer, to_next);
}
return result;
}
void test_convert(const std::string& input,
const std::string& expected_output)
{
boost::program_options::detail::utf8_codecvt_facet facet;
std::wstring output;
{
boost::progress_timer t;
for (int i = 0; i < 10000; ++i)
output = boost::from_8_bit(
input,
facet);
}
{
boost::progress_timer t;
for (int i = 0; i < 10000; ++i)
output = from_8_bit_2(
input,
facet);
}
BOOST_CHECK(output.size()*2 == expected_output.size());
for(unsigned i = 0; i < output.size(); ++i) {
{
unsigned low = output[i];
low &= 0xFF;
unsigned low2 = expected_output[2*i];
low2 &= 0xFF;
BOOST_CHECK(low == low2);
}
{
unsigned high = output[i];
high >>= 8;
high &= 0xFF;
unsigned high2 = expected_output[2*i+1];
BOOST_CHECK(high == high2);
}
}
string ref = boost::to_8_bit(output, facet);
BOOST_CHECK(ref == input);
}
int main(int ac, char* av[])
{
std::string input = file_content("utf8.txt");
std::string expected = file_content("ucs2.txt");
test_convert(input, expected);
if (ac > 1) {
cout << "Trying to convert the command line argument\n";
locale::global(locale(""));
std::wstring w = boost::from_local_8_bit(av[1]);
cout << "Got something, printing decimal code point values\n";
for (unsigned i = 0; i < w.size(); ++i) {
cout << (unsigned)w[i] << "\n";
}
}
return 0;
}
|