File: utf8.hh

package info (click to toggle)
goldendict 1.5.0~rc2%2Bgit20221126%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 17,376 kB
  • sloc: cpp: 60,569; ansic: 11,511; xml: 529; makefile: 74; sh: 42
file content (43 lines) | stat: -rw-r--r-- 1,800 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
 * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */

#include <cstdio>
#include <string>
#include "cpp_features.hh"
#include "ex.hh"
#include "wstring.hh"

/// A simple UTF-8 encoder/decoder. Some dictionary backends only require
/// utf8, so we have this separately, removing the iconv dependency for them.
/// Besides, utf8 is quite ubiquitous now, and its use is spreaded over many
/// places.
namespace Utf8 {

using std::string;
using gd::wstring;
using gd::wchar;

DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )

/// Encodes the given UCS-4 into UTF-8. The inSize specifies the number
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
/// at least inSize * 4 bytes long. The function returns the number of chars
/// stored in the 'out' buffer. The result is not 0-terminated.
size_t encode( wchar const * in, size_t inSize, char * out );
/// Decodes the given UTF-8 into UCS-32. The inSize specifies the number
/// of bytes the 'in' pointer points to. The 'out' buffer must be at least
/// inSize wide characters long. If the given UTF-8 is invalid, the decode
/// function returns -1, otherwise it returns the number of wide characters
/// stored in the 'out' buffer. The result is not 0-terminated.
long decode( char const * in, size_t inSize, wchar * out );

/// Versions for non time-critical code.
string encode( wstring const & ) throw();
wstring decode( string const & ) THROW_SPEC( exCantDecode );

/// Since the standard isspace() is locale-specific, we need something
/// that would never mess up our utf8 input. The stock one worked fine under
/// Linux but was messing up strings under Windows.
bool isspace( int c );

}