File: utf8.c

package info (click to toggle)
kbd 2.0.4-4
  • links: PTS, VCS
  • area: main
  • in suites: buster, sid
  • size: 14,136 kB
  • sloc: sh: 12,838; ansic: 12,531; pascal: 472; lex: 466; makefile: 394; yacc: 356; perl: 126; sed: 16
file content (53 lines) | stat: -rw-r--r-- 1,058 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/* utf8.c - collect routines for conversion to/from utf8 */
#include "config.h"
#include "utf8.h"

/*
 * Convert utf8 to long.
 * On success: update *inptr to be the first nonread character,
 *   set *err to 0, and return the obtained value.
 * On failure: leave *inptr unchanged, set *err to some nonzero error value:
 *   UTF8_BAD: bad utf8, UTF8_SHORT: input too short
 *   and return 0;
 *
 * cnt is either 0 or gives the number of available bytes
 */
unsigned long
from_utf8(char **inptr, int cnt, int *err)
{
	unsigned char *in;
	unsigned int uc, uc2;
	int need, bit, bad = 0;

	in   = (unsigned char *)(*inptr);
	uc   = *in++;
	need = 0;
	bit  = 0x80;
	while (uc & bit) {
		need++;
		bit >>= 1;
	}
	uc &= (bit - 1);
	if (cnt && cnt < need) {
		*err = UTF8_SHORT;
		return 0;
	}
	if (need == 1)
		bad = 1;
	else if (need)
		while (--need) {
			uc2 = *in++;
			if ((uc2 & 0xc0) != 0x80) {
				bad = 1;
				break;
			}
			uc = ((uc << 6) | (uc2 & 0x3f));
		}
	if (bad) {
		*err = UTF8_BAD;
		return 0;
	}
	*inptr = (char *)in;
	*err   = 0;
	return uc;
}