1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
|
/*
* General purpose encoding and decoding.
* Copyright (C) 2007 - 2023 Rene Rebe, ExactCODE GmbH
* Copyright (c) 2008 Valentin Ziegle, ExactCODE GmbH
* Copyright (C) 2007 Susanne Klaus, ExactCODE GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2. A copy of the GNU General
* Public License can be found in the file LICENSE.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANT-
* ABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
* Public License for more details.
*
* Alternatively, commercial licensing options are available from the
* copyright holder ExactCODE GmbH Germany.
*/
#ifndef ENCODINGS_HH
#define ENCODINGS_HH
#include <stdint.h>
#include <string.h> // strleN
#include <vector>
#include <iostream>
#include <sstream>
/*
All encoding functions have signature
void Encode... (std::ostream& stream, const T& data, size_t length ....)
Type T has to implement const CH& operator[] (int) const
where CH can be casted into uint8_t. Most likely T will be one out of
uint8_t*, std::vector<uint8_t>, std::string.
*/
template <typename T>
void EncodeHex(std::ostream& stream, const T& data, size_t length)
{
const int limit=40;
static const char nibble[] = "0123456789abcdef";
for (size_t i = 0; i < length; ++i) {
if (i % limit == 0 && i != 0)
stream.put('\n');
uint8_t byte=data[i];
stream.put(nibble[byte >> 4]);
stream.put(nibble[byte & 0x0f]);
}
}
template <typename T>
void EncodeASCII85(std::ostream& stream, const T& data, size_t length)
{
const int limit=80;
int bytes=0;
if (length>0) {
length--; // decrementing length allows comparisons below without
// performing additional arithmetics
uint32_t quad=0;
int count=3;
for (size_t i=0; i<=length; i++) {
quad <<= 8;
quad |= (uint8_t) data[i];
if (i==length || count==0) {
if (i==length) // pad with zeroes if necessary
for (int n=count; n>0; --n)
quad <<= 8;
if (!(count || quad)) {
stream.put('z');
if(++bytes==limit) {stream.put('\n'); bytes=0;}
} else {
char seq[5];
int n;
for (n=4; n>=0; n--) {
uint32_t rem=quad % 85;
quad /= 85;
seq[n]='!'+rem;
}
for (n=0; n<5-count; n++) {
stream.put(seq[n]);
if(++bytes==limit) {stream.put('\n'); bytes=0;}
}
}
quad=0;
count=4;
}
count--;
}
}
if (bytes>=limit-1)
stream.put('\n');
stream << "~>";
}
template <typename T>
void EncodeBase64(std::ostream& stream, const T& data, size_t length)
{
const int limit = 57;
static const char base64lookup[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
for (size_t i = 0; i < length; i += 3) {
if (i % limit == 0 && i != 0)
stream.put('\n');
int pad = 0;
uint32_t v = (uint32_t)data[i + 0] << 16;
if (i + 1 < length) {
v |= (uint32_t)data[i + 1] << 8;
if (i + 2 < length)
v |= (uint32_t)data[i + 2] << 0;
else
pad = 1;
} else
pad = 2;
stream.put(base64lookup[v >> 18 & 63]);
stream.put(base64lookup[v >> 12 & 63]);
stream.put(pad > 1 ? '=' : base64lookup[(v >> 6 & 63)]);
stream.put(pad > 0 ? '=' : base64lookup[(v >> 0 & 63)]);
}
}
template <typename T>
void DecodeBase64(std::ostream& stream, const T& data, size_t length)
{
static const char base64lookup[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
for (size_t i = 0; i < length; ) {
uint32_t v = 0;
int skip = 0;
for (int j = 4; i < length && j > 0;) {
char c = data[i++];
// this is a bit sloppy and also skips for '=' in the middle of the
// stream, but those are not well formed anyway
if (c == '=') {
++skip; ++j;
continue;
}
// valid base64 content? (skip \n\r et al.)
unsigned int k;
for (k = 0; k < sizeof(base64lookup) - 1; ++k)
if (c == base64lookup[k])
break;
if (k != sizeof(base64lookup) - 1) {
--j;
//std::cerr << j << ": c: " << c << " to " << k << " for " << std::endl;
v |= k << (6 * j);
}
}
stream.put((char)((v >> 16) & 0xFF));
if (skip < 2)
stream.put((char)((v >> 8) & 0xFF));
if (skip < 1)
stream.put((char)((v >> 0) & 0xFF));
}
}
#ifdef WITHZLIB
#include <zlib.h>
// TODO: make template, likewise
// windowBits should be in the rage [8,15]
// negative values indicate no header and CRC
template <typename T>
bool DecodeZlib(std::ostream& stream,
const T& data, size_t length,
const int windowBits = 15, z_stream* z = 0)
{
static const unsigned CHUNK = 16 * 1024;
static const bool trace = false;
int ret;
z_stream _z;
char out[CHUNK];
// allocate deflate state if not supplied
if (!z) {
z = &_z;
z->zalloc = Z_NULL;
z->zfree = Z_NULL;
z->opaque = Z_NULL;
ret = inflateInit2(z, windowBits);
if (ret != Z_OK)
return false;
}
z->next_in = (Bytef*)&data[0];
z->avail_in = length;
// decompress until end of file
while (z->avail_in && ret != Z_STREAM_END)
{
if (trace)
std::cerr << " @ " << z->next_in - (Bytef*)&data[0]
<< " avail in: " << z->avail_in << std::endl;
/* run inflate() on input until output buffer is full, finish
decompression if all of source has been read in */
z->next_out = (Bytef*)out;
z->avail_out = CHUNK;
ret = inflate(z, Z_SYNC_FLUSH);
if (trace)
std::cerr << " ret: " << ret
<< ", avail_out: " << z->avail_out
<< ", next_in: " << (void*)z->next_in
<< ", next_out: " << (void*)z->next_out
<< std::endl;
unsigned have = CHUNK - z->avail_out;
stream.write(out, have);
if (!stream) {
if (z == &_z) (void)inflateEnd(z);
return false;
}
if (ret != Z_OK && ret != Z_STREAM_END) {
if (z == &_z) (void)inflateEnd(z);
return false;
}
}
// clean up and return
if (z == &_z) (void)inflateEnd(z);
return true;
}
struct ZlibEncoder {
static const unsigned CHUNK = 16 * 1024;
static const bool trace = false;
std::ostream& stream;
z_stream z;
char out[CHUNK];
ZlibEncoder(std::ostream& stream)
: stream(stream)
{
// allocate deflate state
z.zalloc = Z_NULL;
z.zfree = Z_NULL;
z.opaque = Z_NULL;
}
~ZlibEncoder() {
z.next_in = 0;
z.avail_in = 0;
for (;;) {
z.next_out = (Bytef*)out;
z.avail_out = CHUNK;
int ret = deflate(&z, Z_FINISH);
const unsigned have = CHUNK - z.avail_out;
if (trace)
std::cerr << " finish: " << ret << ", have: " << have
<< ", avail_in: " << z.avail_in
<< ", avail_out: " << z.avail_out
<< ", next_in: " << (void*)z.next_in
<< ", next_out: " << (void*)z.next_out
<< std::endl;
if (have)
stream.write(out, have);
if (ret == Z_OK && z.avail_out == 0)
continue;
break;
}
// clean up and return
(void)deflateEnd(&z);
}
bool init(int level = 9) {
int ret = deflateInit(&z, level);
return ret == Z_OK;
}
bool write(const char* data, int length) {
// compress until end of file
z.next_in = (Bytef*)data;
z.avail_in = length;
int ret;
for (;;) {
z.next_out = (Bytef*)out;
z.avail_out = CHUNK;
ret = deflate(&z, Z_NO_FLUSH);
const unsigned have = CHUNK - z.avail_out;
if (trace)
std::cerr << " write: " << ret << ", have: " << have
<< ", avail_in: " << z.avail_in
<< ", avail_out: " << z.avail_out
<< ", next_in: " << (void*)z.next_in
<< ", next_out: " << (void*)z.next_out
<< std::endl;
if (have)
stream.write(out, have);
if (ret == Z_OK && z.avail_out == 0)
continue;
break;
}
return ret == Z_OK;
}
};
inline bool EncodeZlib (std::ostream& stream, const char* data,
size_t length, int level = 9)
{
ZlibEncoder encoder(stream);
if (!encoder.init(level))
return false;
if (!encoder.write(data, length))
return false;
return true;
}
#endif
/* Some miscellaneous de- and encoders */
// TODO: make more generic
inline void EncodeUtf8 (std::ostream& stream, uint32_t glyph_code)
{
// utf8 encoding
char utf8[5];
if (glyph_code <= 0x7f) {
utf8[0] = glyph_code;
utf8[1] = 0;
}
else if (glyph_code <= 0x7FF) {
utf8[0] = 0xC0 | (glyph_code >> 6);
utf8[1] = 0x80 | ((glyph_code >> 0) & 0x3F);
utf8[2] = 0;
}
else if (glyph_code <= 0xFFFF) {
utf8[0] = 0xE0 | (glyph_code >> 12);
utf8[1] = 0x80 | ((glyph_code >> 6) & 0x3F);
utf8[2] = 0x80 | ((glyph_code >> 0) & 0x3F);
utf8[3] = 0;
}
else if (glyph_code <= 0x10FFFF) {
utf8[0] = 0xF0 | (glyph_code >> 18);
utf8[1] = 0x80 | ((glyph_code >> 12) & 0x3F);
utf8[2] = 0x80 | ((glyph_code >> 6) & 0x3F);
utf8[3] = 0x80 | ((glyph_code >> 0) & 0x3F);
utf8[4] = 0;
}
else {
std::cerr << "invalid glyph_code: " << glyph_code << std::endl;
}
stream << utf8;
}
inline std::vector<uint32_t>
DecodeUtf8(const char* data, size_t length)
{
std::vector<uint32_t> store;
for (unsigned int i = 0; i < length;)
{
uint32_t g = data[i];
{
if (g & (1<<7)) // utf-8 multi-byte-sequence?
{
// determine byte count
int n; // we could pre-shift one bit, or switch on the bits
for (n = 0; g & (1<<7); ++n)
g <<= 1;
if (n < 2 || n > 4) {
std::cerr << "invalid utf-8 count: " << n << data << std::endl;
}
// first is variable
g = ((data[i++]) & (0xff >> n));
for (--n; n > 0; --n) {
// check the 10 MSB marks for sanity and security
if ((data[i] & 0xC0) != 0x80) {
std::cerr << "incorrect utf-8 multi-byte mark: " << data << std::endl;
}
g = (g << 6) | ((data[i++]) & (0xff >> 2));
}
}
else
i++;
store.push_back(g);
}
}
return store;
}
static inline std::string utf16toutf8(const wchar_t* str, int len = -1)
{
std::stringstream s;
if (len >= 0) {
while (len > 0) {
uint32_t u1 = *str++; len -= 2;
if (u1 >= 0xd800 && u1 < 0xe000) {
uint32_t u2 = len ? len -= 2, *str++ : 0;
// TODO: sanity check
u1 = 0x10000 + (((u1 - 0xd800) << 10) | (u2 - 0xdc00));
}
EncodeUtf8(s, u1);
}
} else {
while (*str) {
uint32_t u1 = *str++;
if (u1 >= 0xd800 && u1 < 0xe000) {
uint32_t u2 = *str ? *str++ : 0;
// TODO: sanity check
u1 = 0x10000 + (((u1 - 0xd800) << 10) | (u2 - 0xdc00));
}
EncodeUtf8(s, u1);
}
}
return s.str();
}
static inline std::wstring utf8toutf16(const char* str)
{
std::vector<uint32_t> vec = DecodeUtf8(str, strlen(str));
std::wstring s;
for (int i = 0; i < vec.size(); ++i) {
uint32_t u = vec[i];
if (u < 0x10000) {
s.push_back((wchar_t)u);
} else {
u -= 0x10000;
s.push_back((wchar_t)(u >> 10) + 0xd800);
s.push_back((wchar_t)(u & 0x3ff) + 0xdc00);
}
}
return s;
}
template<typename T>
static inline void sed(T& source, const T& find, const T& replace)
{
for (typename T::size_type i = 0; (i = source.find(find, i)) != T::npos;)
{
source.replace(i, find.length(), replace);
i += replace.length() - find.length() + 1;
}
}
#endif
|