1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
// SPDX-FileCopyrightText: Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
// SPDX-FileCopyrightText: Copyright 2010 Sandia Corporation
// SPDX-License-Identifier: LicenseRef-BSD-3-Clause-Sandia-USGov
#include "vtkUTF16TextCodec.h"
#include "vtkObjectFactory.h"
#include "vtkTextCodecFactory.h"
#include <stdexcept>
VTK_ABI_NAMESPACE_BEGIN
vtkStandardNewMacro(vtkUTF16TextCodec);
namespace
{
//////////////////////////////////////////////////////////////////////////////
// utf16_to_unicode
vtkTypeUInt32 utf16_to_unicode_next(const bool big_endian, istream& InputStream)
{
vtkTypeUInt8 first_byte = InputStream.get();
if (InputStream.eof())
{
throw std::runtime_error("Premature end-of-sequence extracting UTF-16 code unit.");
}
vtkTypeUInt8 second_byte = InputStream.get();
vtkTypeUInt32 returnCode =
big_endian ? first_byte << 8 | second_byte : second_byte << 8 | first_byte;
if (returnCode >= 0xd800 && returnCode <= 0xdfff)
{
if (InputStream.eof())
{
throw std::runtime_error(
"Premature end-of-sequence extracting UTF-16 trail surrogate first byte.");
}
vtkTypeUInt8 third_byte = InputStream.get();
if (InputStream.eof())
{
throw std::runtime_error(
"Premature end-of-sequence extracting UTF-16 trail surrogate second byte.");
}
vtkTypeUInt8 fourth_byte = InputStream.get();
const vtkTypeUInt32 second_code_unit =
big_endian ? third_byte << 8 | fourth_byte : fourth_byte << 8 | third_byte;
if (second_code_unit >= 0xdc00 && second_code_unit <= 0xdfff)
{
returnCode = vtkTypeUInt32(vtkTypeInt32(returnCode << 10) + vtkTypeInt32(second_code_unit) +
(0x10000 - (0xd800 << 10) - 0xdc00));
}
else
{
throw std::runtime_error("Invalid UTF-16 trail surrogate.");
}
}
return returnCode;
}
} // end anonymous namespace
vtkUTF16TextCodec::vtkUTF16TextCodec()
: _endianExplicitlySet(false)
, _bigEndian(true)
{
}
vtkUTF16TextCodec::~vtkUTF16TextCodec() = default;
const char* vtkUTF16TextCodec::Name()
{
return "UTF-16";
}
bool vtkUTF16TextCodec::CanHandle(const char* NameString)
{
if (vtkTextCodec::CanHandle(NameString))
{
_endianExplicitlySet = false;
return true;
}
else if (0 == strcmp(NameString, "UTF-16BE"))
{
SetBigEndian(true);
return true;
}
else if (0 == strcmp(NameString, "UTF-16LE"))
{
SetBigEndian(false);
return true;
}
else
{
return false;
}
}
void vtkUTF16TextCodec::SetBigEndian(bool state)
{
_endianExplicitlySet = true;
_bigEndian = state;
}
void vtkUTF16TextCodec::FindEndianness(istream& InputStream)
{
_endianExplicitlySet = false;
try
{
istream::char_type c1, c2;
c1 = InputStream.get();
if (InputStream.fail())
throw "End of Input reached while reading header.";
c2 = InputStream.get();
if (InputStream.fail())
throw "End of Input reached while reading header.";
if (static_cast<unsigned char>(c1) == 0xfe && static_cast<unsigned char>(c2) == 0xff)
{
_bigEndian = true;
}
else if (static_cast<unsigned char>(c1) == 0xff && static_cast<unsigned char>(c2) == 0xfe)
{
_bigEndian = false;
}
else
{
throw std::runtime_error(
"Cannot detect UTF-16 endianness. Try 'UTF-16BE' or 'UTF-16LE' instead.");
}
}
catch (char* cstr)
{
throw std::runtime_error(cstr);
}
catch (...)
{
throw std::runtime_error(
"Cannot detect UTF-16 endianness. Try 'UTF-16BE' or 'UTF-16LE' instead.");
}
}
void vtkUTF16TextCodec::ToUnicode(istream& InputStream, vtkTextCodec::OutputIterator& output)
{
if (!_endianExplicitlySet)
{
FindEndianness(InputStream);
}
vtkTextCodec::ToUnicode(InputStream, output);
}
vtkTypeUInt32 vtkUTF16TextCodec::NextUTF32CodePoint(istream& inputStream)
{
return utf16_to_unicode_next(_bigEndian, inputStream);
}
void vtkUTF16TextCodec::PrintSelf(ostream& os, vtkIndent indent)
{
os << indent << "vtkUTF16TextCodec (" << this << ") \n";
indent = indent.GetNextIndent();
this->Superclass::PrintSelf(os, indent.GetNextIndent());
}
VTK_ABI_NAMESPACE_END
|