File: iconv.cc

package info (click to toggle)
goldendict 1.5.0~rc2%2Bgit20221126%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 17,376 kB
  • sloc: cpp: 60,569; ansic: 11,511; xml: 529; makefile: 74; sh: 42
file content (152 lines) | stat: -rw-r--r-- 4,037 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
 * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */

#include "iconv.hh"
#include <vector>
#include <errno.h>
#include <string.h>

#ifdef __WIN32
char const * const Iconv::GdWchar = "UCS-4LE";
#else
char const * const Iconv::GdWchar = "WCHAR_T";
#endif

char const * const Iconv::Utf16Le = "UTF-16LE";
char const * const Iconv::Utf8 = "UTF-8";

using gd::wchar;

Iconv::Iconv( char const * to, char const * from ) THROW_SPEC( exCantInit ):
  state( iconv_open( to, from ) )
{
  if ( state == (iconv_t) -1 )
    throw exCantInit( strerror( errno ) );
}

void Iconv::reinit( char const * to, char const * from ) THROW_SPEC( exCantInit )
{
  iconv_close( state );

  state = iconv_open( to, from );

  if ( state == (iconv_t) -1 )
    throw exCantInit( strerror( errno ) );
}

Iconv::~Iconv() throw()
{
  iconv_close( state );
}

Iconv::Result Iconv::convert( void const * & inBuf, size_t  & inBytesLeft,
                              void * & outBuf, size_t & outBytesLeft )
  THROW_SPEC( exIncorrectSeq, exOther )
{
  size_t result = iconv( state,
//                         #ifdef __WIN32
//                         (char const **)&inBuf,
//                         #else
                         (char **)&inBuf,
//                         #endif
                                           &inBytesLeft,
                         (char **)&outBuf, &outBytesLeft );

  if ( result == (size_t) -1 )
  {
    switch( errno )
    {
      case EILSEQ:
        throw exIncorrectSeq();
      case EINVAL:
        return NeedMoreIn;
      case E2BIG:
        return NeedMoreOut;
      default:
        throw exOther( strerror( errno ) );
    }
  }

  return Success;
}

gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData,
                              size_t dataSize )
  THROW_SPEC( exCantInit, exIncorrectSeq, exPrematureEnd, exOther )
{
  /// Special-case the dataSize == 0 to avoid any kind of iconv-specific
  /// behaviour in that regard.

  if ( !dataSize )
    return gd::wstring();

  Iconv ic( GdWchar, fromEncoding );

  /// This size is usually enough, but may be enlarged during the conversion
  std::vector< wchar > outBuf( dataSize );

  void * outBufPtr = &outBuf.front();

  size_t outBufLeft = outBuf.size() * sizeof( wchar );

  for( ; ; )
  {
    switch( ic.convert( fromData, dataSize, outBufPtr, outBufLeft ) )
    {
      case Success:
        return gd::wstring( &outBuf.front(),
                            outBuf.size() - outBufLeft / sizeof( wchar ) );
      case NeedMoreIn:
        throw exPrematureEnd();
      case NeedMoreOut:
      {
        // Grow the buffer and retry
        // The pointer may get invalidated so we save the diff and restore it
        size_t offset = (wchar *)outBufPtr - &outBuf.front();
        outBuf.resize( outBuf.size() + 256 );
        outBufPtr = &outBuf.front() + offset;
        outBufLeft += 256;
      }
    }
  }
}

std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData,
                           size_t dataSize )
  THROW_SPEC( exCantInit, exIncorrectSeq, exPrematureEnd, exOther )
{
  // Similar to toWstring

  if ( !dataSize )
    return std::string();

  Iconv ic( Utf8, fromEncoding );

  std::vector< char > outBuf( dataSize );

  void * outBufPtr = &outBuf.front();

  size_t outBufLeft = outBuf.size();

  for( ; ; )
  {
    switch( ic.convert( fromData, dataSize, outBufPtr, outBufLeft ) )
    {
      case Success:
        return std::string( &outBuf.front(),
                            outBuf.size() - outBufLeft );
      case NeedMoreIn:
        throw exPrematureEnd();
      case NeedMoreOut:
      {
        // Grow the buffer and retry
        // The pointer may get invalidated so we save the diff and restore it
        size_t offset = (char *)outBufPtr - &outBuf.front();
        outBuf.resize( outBuf.size() + 256 );
        outBufPtr = &outBuf.front() + offset;
        outBufLeft += 256;
      }
    }
  }
}