1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
/* Look at first character in UTF-8 string.
Copyright (C) 1999-2000, 2002, 2006-2007, 2009-2012 Free Software
Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include "unistr.h"
int
u8_strmbtouc (ucs4_t *puc, const uint8_t *s)
{
/* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
uint8_t c = *s;
if (c < 0x80)
{
*puc = c;
return (c != 0 ? 1 : 0);
}
if (c >= 0xc2)
{
if (c < 0xe0)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40)
#else
if (s[1] != 0)
#endif
{
*puc = ((unsigned int) (c & 0x1f) << 6)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
}
else if (c < 0xf0)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (c >= 0xe1 || s[1] >= 0xa0)
&& (c != 0xed || s[1] < 0xa0))
#else
if (s[1] != 0 && s[2] != 0)
#endif
{
*puc = ((unsigned int) (c & 0x0f) << 12)
| ((unsigned int) (s[1] ^ 0x80) << 6)
| (unsigned int) (s[2] ^ 0x80);
return 3;
}
}
else if (c < 0xf8)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40
&& (c >= 0xf1 || s[1] >= 0x90)
#if 1
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
)
#else
if (s[1] != 0 && s[2] != 0 && s[3] != 0)
#endif
{
*puc = ((unsigned int) (c & 0x07) << 18)
| ((unsigned int) (s[1] ^ 0x80) << 12)
| ((unsigned int) (s[2] ^ 0x80) << 6)
| (unsigned int) (s[3] ^ 0x80);
return 4;
}
}
#if 0
else if (c < 0xfc)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (c >= 0xf9 || s[1] >= 0x88))
#else
if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0)
#endif
{
*puc = ((unsigned int) (c & 0x03) << 24)
| ((unsigned int) (s[1] ^ 0x80) << 18)
| ((unsigned int) (s[2] ^ 0x80) << 12)
| ((unsigned int) (s[3] ^ 0x80) << 6)
| (unsigned int) (s[4] ^ 0x80);
return 5;
}
}
else if (c < 0xfe)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (s[5] ^ 0x80) < 0x40
&& (c >= 0xfd || s[1] >= 0x84))
#else
if (s[1] != 0 && s[2] != 0 && s[3] != 0 && s[4] != 0 && s[5] != 0)
#endif
{
*puc = ((unsigned int) (c & 0x01) << 30)
| ((unsigned int) (s[1] ^ 0x80) << 24)
| ((unsigned int) (s[2] ^ 0x80) << 18)
| ((unsigned int) (s[3] ^ 0x80) << 12)
| ((unsigned int) (s[4] ^ 0x80) << 6)
| (unsigned int) (s[5] ^ 0x80);
return 6;
}
}
#endif
}
/* invalid or incomplete multibyte character */
return -1;
}
|