1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
// Copyright (c) 1994 James Clark, 2000 Matthias Clasen
// See the file COPYING for copying permission.
#include "splib.h"
#ifdef SP_MULTI_BYTE
#include "UnicodeCodingSystem.h"
#include "UTF16CodingSystem.h"
#include "macros.h"
#include "Owner.h"
#include <stddef.h>
#include <string.h>
#ifdef DECLARE_MEMMOVE
extern "C" {
void *memmove(void *, const void *, size_t);
}
#endif
#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif
const unsigned short byteOrderMark = 0xfeff;
const unsigned short swappedByteOrderMark = 0xfffe;
class UnicodeDecoder : public Decoder {
public:
UnicodeDecoder(const InputCodingSystem *sub);
size_t decode(Char *to, const char *from, size_t fromLen,
const char **rest);
Boolean convertOffset(unsigned long &offset) const;
private:
PackedBoolean hadByteOrderMark_;
PackedBoolean swapBytes_;
Owner<Decoder> subDecoder_;
const InputCodingSystem *subCodingSystem_;
};
class UnicodeEncoder : public Encoder {
public:
UnicodeEncoder();
void output(const Char *, size_t, OutputByteStream *);
void startFile(OutputByteStream *);
private:
Owner<Encoder> subEncoder_;
};
UnicodeCodingSystem::UnicodeCodingSystem(const InputCodingSystem *sub)
: sub_(sub)
{
}
Decoder *UnicodeCodingSystem::makeDecoder() const
{
return new UnicodeDecoder(sub_);
}
Encoder *UnicodeCodingSystem::makeEncoder() const
{
return new UnicodeEncoder;
}
UnicodeDecoder::UnicodeDecoder(const InputCodingSystem *subCodingSystem)
: Decoder(subCodingSystem ? 1 : 2), subCodingSystem_(subCodingSystem),
hadByteOrderMark_(0), swapBytes_(0)
{
}
size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen,
const char **rest)
{
union U {
unsigned short word;
char bytes[2];
};
if (subDecoder_)
return subDecoder_->decode(to, from, fromLen, rest);
if (fromLen < 2) {
*rest = from;
return 0;
}
minBytesPerChar_ = 2;
U u;
u.bytes[0] = from[0];
u.bytes[1] = from[1];
if (u.word == byteOrderMark) {
hadByteOrderMark_ = 1;
from += 2;
fromLen -= 2;
}
else if (u.word == swappedByteOrderMark) {
hadByteOrderMark_ = 1;
from += 2;
fromLen -= 2;
swapBytes_ = 1;
}
if (hadByteOrderMark_ || !subCodingSystem_)
subCodingSystem_ = new UTF16CodingSystem;
subDecoder_ = subCodingSystem_->makeDecoder(swapBytes_);
minBytesPerChar_ = subDecoder_->minBytesPerChar();
return subDecoder_->decode(to, from, fromLen, rest);
}
Boolean UnicodeDecoder::convertOffset(unsigned long &n) const
{
subDecoder_->convertOffset(n);
if (hadByteOrderMark_)
n += 2;
return true;
}
UnicodeEncoder::UnicodeEncoder()
{
UTF16CodingSystem utf16;
subEncoder_ = utf16.makeEncoder();
}
void UnicodeEncoder::startFile(OutputByteStream *sb)
{
const unsigned short n = byteOrderMark;
sb->sputn((char *)&n, 2);
}
void UnicodeEncoder::output(const Char *s, size_t n, OutputByteStream *sb)
{
subEncoder_->output(s, n, sb);
}
#ifdef SP_NAMESPACE
}
#endif
#else /* not SP_MULTI_BYTE */
#ifndef __GNUG__
static char non_empty_translation_unit; // sigh
#endif
#endif /* not SP_MULTI_BYTE */
|