1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
/*
* virt_mbsnrtowcs.c
*
* $Id: virt_mbsnrtowcs.c,v 1.1.1.1 2006/04/11 17:56:16 source Exp $
*
* This file is part of the OpenLink Software Virtuoso Open-Source (VOS)
* project.
*
* Copyright (C) 1998-2006 OpenLink Software
*
* This project is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; only version 2 of the License, dated June 1991.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*
*/
#include "libutil.h"
size_t
virt_mbsnrtowcs (wchar_t * dst, unsigned char ** src, size_t nmc, size_t len, virt_mbstate_t * ps)
{
size_t written = 0;
char *run = (char *) *src;
char *last = run + nmc;
static virt_mbstate_t internal;
if (ps == NULL)
ps = &internal;
if (dst == NULL)
/* The LEN parameter has to be ignored if we don't actually write
anything. */
len = ~0;
/* Copy all words. */
while (written < len && run < last)
{
wchar_t value;
size_t count;
unsigned char byte = *run++;
/* We expect a start of a new multibyte character. */
if (byte < 0x80)
{
/* One byte sequence. */
count = 0;
value = byte;
}
else if ((byte & 0xe0) == 0xc0)
{
count = 1;
value = byte & 0x1f;
}
else if ((byte & 0xf0) == 0xe0)
{
/* We expect three bytes. */
count = 2;
value = byte & 0x0f;
}
else if ((byte & 0xf8) == 0xf0)
{
/* We expect four bytes. */
count = 3;
value = byte & 0x07;
}
else if ((byte & 0xfc) == 0xf8)
{
/* We expect five bytes. */
count = 4;
value = byte & 0x03;
}
else if ((byte & 0xfe) == 0xfc)
{
/* We expect six bytes. */
count = 5;
value = byte & 0x01;
}
else
{
/* This is an illegal encoding. */
/* errno = (EILSEQ); */
return (size_t) -1;
}
/* Read the possible remaining bytes. */
while (count-- > 0)
{
byte = *run++;
if ((byte & 0xc0) != 0x80)
{
/* This is an illegal encoding. */
/* errno = (EILSEQ); */
return (size_t) -1;
}
value <<= 6;
value |= byte & 0x3f;
}
/* Store value is required. */
if (dst != NULL)
*dst++ = value;
/* The whole sequence is read. Check whether end of string is
reached. */
/* This is an invalid 'if', it fails on reading
wide blob like
concat(UnicodeGammaSeq(128,3,65536), UnicodeGammSeq(64,3,127))
if (value == L'\0')
{
/ * Found the end of the string. * /
*src = NULL;
return written;
}
The following 'if' should be used instead. */
if (value == L'\0' && run == last)
{
/* Found the end of the string. */
*src = (unsigned char *) run;
return written;
}
/* Increment counter of produced words. */
++written;
}
/* Store address of next byte to process. */
*src = (unsigned char *) run;
return written;
}
|