File: utf8.c

package info (click to toggle)
epic5 1.1.10-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 4,624 kB
  • ctags: 6,288
  • sloc: ansic: 68,826; makefile: 700; ruby: 227; sh: 178; perl: 30
file content (77 lines) | stat: -rw-r--r-- 2,381 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*
 * Here's the plan
 * You type a UTF8 string, and it gives you all of the Unicode code points
 * (in hex) on output.
 */
#include <stdio.h>

int main (void) 
{
	unsigned char 	string[256];
	unsigned char *  next_point;
	unsigned char *	this_point;
	unsigned long	code_point;
	unsigned int	numbytes;

    for (;;)
    {
	fgets(string, 256, stdin);
	if (feof(stdin)) {
		exit(0);
	}
	next_point = this_point = string;

    while (*this_point != '\n')
    {
	code_point = 0;

	if (((unsigned long)(*this_point) & 0xFE) == 0xFC) {
		numbytes = 6;
		code_point = ((unsigned long)this_point[0] & 0x01) << 30;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 24;
		code_point += ((unsigned long)this_point[2] & 0x3F) << 18;
		code_point += ((unsigned long)this_point[3] & 0x3F) << 12;
		code_point += ((unsigned long)this_point[4] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[5] & 0x3F);
		this_point += 6;
	} else if (((unsigned long)(*this_point) & 0xFC) == 0xF8) {
		numbytes = 5;
		code_point = ((unsigned long)this_point[0] & 0x03) << 24;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 18;
		code_point += ((unsigned long)this_point[2] & 0x3F) << 12;
		code_point += ((unsigned long)this_point[3] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[4] & 0x3F);
		this_point += 5;
	} else if (((unsigned long)(*this_point) & 0xF8) == 0xF0) {
		numbytes = 4;
		code_point = ((unsigned long)this_point[0] & 0x07) << 18;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 12;
		code_point += ((unsigned long)this_point[2] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[3] & 0x3F);
		this_point += 4;
	} else if (((unsigned long)(*this_point) & 0xF0) == 0xE0) {
		numbytes = 3;
		code_point = ((unsigned long)this_point[0] & 0x0F) << 12;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[2] & 0x3F);
		this_point += 3;
	} else if (((unsigned long)(*this_point) & 0xE0) == 0xC0) {
		numbytes = 2;
		code_point = (this_point[0] & 0x1F) << 6;
		code_point += (this_point[1] & 0x3F);
		this_point += 2;
	} else if (((unsigned long)(*this_point) & 0x80) == 0x00) {
		numbytes = 1;
		code_point = (this_point[0] & 0x7F);
		this_point++;
	} else {
		printf("Huh? %#x\n", (int)*this_point);
		printf("%d\n", *this_point & 0x80);
		exit(0);
	}

	printf("%#x\n", code_point);
    }
    }
}