File: vtkUTF16TextCodec.cxx

package info (click to toggle)
paraview 5.13.2%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 544,220 kB
  • sloc: cpp: 3,374,605; ansic: 1,332,409; python: 150,381; xml: 122,166; sql: 65,887; sh: 7,317; javascript: 5,262; yacc: 4,417; java: 3,977; perl: 2,363; lex: 1,929; f90: 1,397; makefile: 170; objc: 153; tcl: 59; pascal: 50; fortran: 29
file content (170 lines) | stat: -rw-r--r-- 4,158 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// SPDX-FileCopyrightText: Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
// SPDX-FileCopyrightText: Copyright 2010 Sandia Corporation
// SPDX-License-Identifier: LicenseRef-BSD-3-Clause-Sandia-USGov
#include "vtkUTF16TextCodec.h"

#include "vtkObjectFactory.h"
#include "vtkTextCodecFactory.h"

#include <stdexcept>

VTK_ABI_NAMESPACE_BEGIN
vtkStandardNewMacro(vtkUTF16TextCodec);

namespace
{
//////////////////////////////////////////////////////////////////////////////
// utf16_to_unicode

vtkTypeUInt32 utf16_to_unicode_next(const bool big_endian, istream& InputStream)
{
  vtkTypeUInt8 first_byte = InputStream.get();

  if (InputStream.eof())
  {
    throw std::runtime_error("Premature end-of-sequence extracting UTF-16 code unit.");
  }
  vtkTypeUInt8 second_byte = InputStream.get();

  vtkTypeUInt32 returnCode =
    big_endian ? first_byte << 8 | second_byte : second_byte << 8 | first_byte;

  if (returnCode >= 0xd800 && returnCode <= 0xdfff)
  {
    if (InputStream.eof())
    {
      throw std::runtime_error(
        "Premature end-of-sequence extracting UTF-16 trail surrogate first byte.");
    }
    vtkTypeUInt8 third_byte = InputStream.get();

    if (InputStream.eof())
    {
      throw std::runtime_error(
        "Premature end-of-sequence extracting UTF-16 trail surrogate second byte.");
    }
    vtkTypeUInt8 fourth_byte = InputStream.get();

    const vtkTypeUInt32 second_code_unit =
      big_endian ? third_byte << 8 | fourth_byte : fourth_byte << 8 | third_byte;
    if (second_code_unit >= 0xdc00 && second_code_unit <= 0xdfff)
    {
      returnCode = vtkTypeUInt32(vtkTypeInt32(returnCode << 10) + vtkTypeInt32(second_code_unit) +
        (0x10000 - (0xd800 << 10) - 0xdc00));
    }
    else
    {
      throw std::runtime_error("Invalid UTF-16 trail surrogate.");
    }
  }
  return returnCode;
}

} // end anonymous namespace

vtkUTF16TextCodec::vtkUTF16TextCodec()
  : _endianExplicitlySet(false)
  , _bigEndian(true)
{
}

vtkUTF16TextCodec::~vtkUTF16TextCodec() = default;

const char* vtkUTF16TextCodec::Name()
{
  return "UTF-16";
}

bool vtkUTF16TextCodec::CanHandle(const char* NameString)
{
  if (vtkTextCodec::CanHandle(NameString))
  {
    _endianExplicitlySet = false;
    return true;
  }
  else if (0 == strcmp(NameString, "UTF-16BE"))
  {
    SetBigEndian(true);
    return true;
  }
  else if (0 == strcmp(NameString, "UTF-16LE"))
  {
    SetBigEndian(false);
    return true;
  }
  else
  {
    return false;
  }
}

void vtkUTF16TextCodec::SetBigEndian(bool state)
{
  _endianExplicitlySet = true;
  _bigEndian = state;
}

void vtkUTF16TextCodec::FindEndianness(istream& InputStream)
{
  _endianExplicitlySet = false;

  try
  {
    istream::char_type c1, c2;
    c1 = InputStream.get();
    if (InputStream.fail())
      throw "End of Input reached while reading header.";

    c2 = InputStream.get();
    if (InputStream.fail())
      throw "End of Input reached while reading header.";

    if (static_cast<unsigned char>(c1) == 0xfe && static_cast<unsigned char>(c2) == 0xff)
    {
      _bigEndian = true;
    }

    else if (static_cast<unsigned char>(c1) == 0xff && static_cast<unsigned char>(c2) == 0xfe)
    {
      _bigEndian = false;
    }

    else
    {
      throw std::runtime_error(
        "Cannot detect UTF-16 endianness.  Try 'UTF-16BE' or 'UTF-16LE' instead.");
    }
  }
  catch (char* cstr)
  {
    throw std::runtime_error(cstr);
  }
  catch (...)
  {
    throw std::runtime_error(
      "Cannot detect UTF-16 endianness.  Try 'UTF-16BE' or 'UTF-16LE' instead.");
  }
}

void vtkUTF16TextCodec::ToUnicode(istream& InputStream, vtkTextCodec::OutputIterator& output)
{
  if (!_endianExplicitlySet)
  {
    FindEndianness(InputStream);
  }

  vtkTextCodec::ToUnicode(InputStream, output);
}

vtkTypeUInt32 vtkUTF16TextCodec::NextUTF32CodePoint(istream& inputStream)
{
  return utf16_to_unicode_next(_bigEndian, inputStream);
}

void vtkUTF16TextCodec::PrintSelf(ostream& os, vtkIndent indent)
{
  os << indent << "vtkUTF16TextCodec (" << this << ") \n";
  indent = indent.GetNextIndent();
  this->Superclass::PrintSelf(os, indent.GetNextIndent());
}
VTK_ABI_NAMESPACE_END