File: utf8.c

package info (click to toggle)
wp2latex 3.97%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 5,228 kB
  • sloc: cpp: 45,091; ansic: 8,998; asm: 2,435; makefile: 529; sh: 19
file content (53 lines) | stat: -rw-r--r-- 1,283 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#include <stdio.h>

#include "typedfs.h"

/*Ensure that prototypes are correctly declared even for C*/
#ifdef __cplusplus
extern "C" {
DWORD utf8_fgetc(FILE *F);
}
#endif


/* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
static const BYTE utf8_length[128] =
{
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
    3,3,3,3,3,3,3,3,4,4,4,4,5,5,0,0  /* 0xf0-0xff */
};

/* first byte mask depending on UTF-8 sequence length */
static const unsigned char utf8_mask[6] = { 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };


DWORD utf8_fgetc(FILE *F)
{
BYTE buffer[7],*b;
BYTE len;
DWORD d;

 d = fgetc(F);
 if(d<0x80) return(d);
 if(d>0xFF) return(0xFFFFFFFF);	//end of file

 len = utf8_length[d-0x80];
 if(len==0) return(0xFFFFFFFF);
 d=d & utf8_mask[len];
 fread(buffer,len,1,F);
 buffer[len]=0;
 b=buffer;
 while(*b>0)
   {
   if( (*b=*b ^ 0x80) >= 0x40 ) return(0xFFFFFFFF); //error
   d=(d<<6) | *b;
   b++;
   }
 return(d);
}