File: UnicodeCodingSystem.cxx

package info (click to toggle)
opensp 1.5.1.0-2
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 7,976 kB
  • ctags: 9,416
  • sloc: cpp: 65,473; ansic: 13,242; sh: 9,762; makefile: 716; perl: 564; yacc: 316
file content (143 lines) | stat: -rw-r--r-- 3,150 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// Copyright (c) 1994 James Clark, 2000 Matthias Clasen
// See the file COPYING for copying permission.

#include "splib.h"

#ifdef SP_MULTI_BYTE

#include "UnicodeCodingSystem.h"
#include "UTF16CodingSystem.h"
#include "macros.h"
#include "Owner.h"

#include <stddef.h>
#include <string.h>
#ifdef DECLARE_MEMMOVE
extern "C" {
  void *memmove(void *, const void *, size_t);
}
#endif

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

const unsigned short byteOrderMark = 0xfeff;
const unsigned short swappedByteOrderMark = 0xfffe;

class UnicodeDecoder : public Decoder {
public:
  UnicodeDecoder(const InputCodingSystem *sub);
  size_t decode(Char *to, const char *from, size_t fromLen,
		const char **rest);
  Boolean convertOffset(unsigned long &offset) const;
private:
  PackedBoolean hadByteOrderMark_;
  PackedBoolean swapBytes_;
  Owner<Decoder> subDecoder_;
  const InputCodingSystem *subCodingSystem_;
};

class UnicodeEncoder : public Encoder {
public:
  UnicodeEncoder();
  void output(const Char *, size_t, OutputByteStream *);
  void startFile(OutputByteStream *);
private:
  Owner<Encoder> subEncoder_;
};

UnicodeCodingSystem::UnicodeCodingSystem(const InputCodingSystem *sub)
: sub_(sub)
{
}

Decoder *UnicodeCodingSystem::makeDecoder() const
{
  return new UnicodeDecoder(sub_);
}

Encoder *UnicodeCodingSystem::makeEncoder() const
{
  return new UnicodeEncoder;
}

UnicodeDecoder::UnicodeDecoder(const InputCodingSystem *subCodingSystem)
: Decoder(subCodingSystem ? 1 : 2), subCodingSystem_(subCodingSystem),
  hadByteOrderMark_(0), swapBytes_(0)
{
}


size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen,
			      const char **rest)
{
  union U {
    unsigned short word;
    char bytes[2];
  };
    
  if (subDecoder_)
    return subDecoder_->decode(to, from, fromLen, rest);
    if (fromLen < 2) {
      *rest = from;
      return 0;
    }
    minBytesPerChar_ = 2;
    U u;
    u.bytes[0] = from[0];
    u.bytes[1] = from[1];
    if (u.word == byteOrderMark) {
      hadByteOrderMark_ = 1;
      from += 2;
      fromLen -= 2;
    }
    else if (u.word == swappedByteOrderMark) {
      hadByteOrderMark_ = 1;
      from += 2;
      fromLen -= 2;
      swapBytes_ = 1;
    }
  if (hadByteOrderMark_ || !subCodingSystem_)
    subCodingSystem_ = new UTF16CodingSystem;
  subDecoder_ = subCodingSystem_->makeDecoder(swapBytes_);
      minBytesPerChar_ = subDecoder_->minBytesPerChar();
      return subDecoder_->decode(to, from, fromLen, rest);
}

Boolean UnicodeDecoder::convertOffset(unsigned long &n) const
{
  subDecoder_->convertOffset(n);
  if (hadByteOrderMark_)
    n += 2;
  return true;
}

UnicodeEncoder::UnicodeEncoder()
{
  UTF16CodingSystem utf16;
  subEncoder_ = utf16.makeEncoder();
}

void UnicodeEncoder::startFile(OutputByteStream *sb)
{
  const unsigned short n = byteOrderMark;
  sb->sputn((char *)&n, 2);
}

void UnicodeEncoder::output(const Char *s, size_t n, OutputByteStream *sb)
{
  subEncoder_->output(s, n, sb);
}

#ifdef SP_NAMESPACE
}
#endif

#else /* not SP_MULTI_BYTE */

#ifndef __GNUG__
static char non_empty_translation_unit;	// sigh
#endif

#endif /* not SP_MULTI_BYTE */