File: iconvtc.c

package info (click to toggle)
tidy 20080116cvs-2
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 7,296 kB
  • ctags: 3,259
  • sloc: ansic: 30,069; sh: 9,060; xml: 317; makefile: 41
file content (105 lines) | stat: -rw-r--r-- 2,674 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/* iconvtc.c -- Interface to iconv transcoding routines

  (c) 1998-2003 (W3C) MIT, ERCIM, Keio University
  See tidy.h for the copyright notice.

  $Id: iconvtc.c,v 1.1 2003/04/28 22:59:41 hoehrmann Exp $
*/

#include <tidy.h>
#include "forward.h"
#include "streamio.h"

#ifdef TIDY_ICONV_SUPPORT

#include <iconv.h>

/* maximum number of bytes for a single character */
#define TC_INBUFSIZE  16

/* maximum number of characters per byte sequence */
#define TC_OUTBUFSIZE 16

Bool IconvInitInputTranscoder(void)
{
    return no;
}

void IconvUninitInputTranscoder(void)
{
    return;
}

int IconvGetChar(byte firstByte, StreamIn * in, uint * bytesRead)
{
    iconv_t cd;
    TidyInputSource * source;
    char inbuf[TC_INBUFSIZE] = { 0 };
    char outbuf[TC_OUTBUFSIZE] = { 0 };
    size_t inbufsize = 0;

    assert( in != NULL );
    assert( &in->source != NULL );
    assert( bytesRead != NULL );
    assert( in->iconvptr != 0 );

    cd = (iconv_t)in->iconvptr;
    source = &in->source;

    inbuf[inbufsize++] = (char)firstByte;
    
    while(inbufsize < TC_INBUFSIZE)
    {
        char * outbufptr = (char*)outbuf;
        char * inbufptr = (char*)inbuf;
        size_t readNow = inbufsize;
        size_t writeNow = TC_OUTBUFSIZE;
        size_t result = 0;
        int iconv_errno = 0;
        int nextByte = EndOfStream;

        result = iconv(cd, (const char**)&inbufptr, &readNow, (char**)&outbufptr, &writeNow);
        iconv_errno = errno;

        if (result != (size_t)(-1))
        {
            int c;

            /* create codepoint from UTF-32LE octets */
            c = (unsigned char)outbuf[0];
            c += (unsigned char)outbuf[1] << 8;
            c += (unsigned char)outbuf[2] << 16;
            c += (unsigned char)outbuf[3] << 32;

            /* set number of read bytes */
            *bytesRead = inbufsize;

            return c;
        }

        assert( iconv_errno != EILSEQ ); /* broken multibyte sequence */
        assert( iconv_errno != E2BIG );  /* not enough memory         */
        assert( iconv_errno == EINVAL ); /* incomplete sequence       */

        /* we need more bytes */
        nextByte = source->getByte(source->sourceData);

        if (nextByte == EndOfStream)
        {
            /* todo: error message for broken stream? */

            *bytesRead = inbufsize;
            return EndOfStream;
        }

        inbuf[inbufsize++] = (char)nextByte;
    }

    /* No full character found after reading TC_INBUFSIZE bytes, */
    /* give up to read this stream, it's obviously unreadable.   */

    /* todo: error message for broken stream? */
    return EndOfStream;
}

#endif /* TIDY_ICONV_SUPPORT */