1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
#include <stdio.h>
#include "typedfs.h"
/*Ensure that prototypes are correctly declared even for C*/
#ifdef __cplusplus
extern "C" {
DWORD utf8_fgetc(FILE *F);
}
#endif
/* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
static const BYTE utf8_length[128] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
3,3,3,3,3,3,3,3,4,4,4,4,5,5,0,0 /* 0xf0-0xff */
};
/* first byte mask depending on UTF-8 sequence length */
static const unsigned char utf8_mask[6] = { 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
DWORD utf8_fgetc(FILE *F)
{
BYTE buffer[7],*b;
BYTE len;
DWORD d;
d = fgetc(F);
if(d<0x80) return(d);
if(d>0xFF) return(0xFFFFFFFF); //end of file
len = utf8_length[d-0x80];
if(len==0) return(0xFFFFFFFF);
d=d & utf8_mask[len];
fread(buffer,len,1,F);
buffer[len]=0;
b=buffer;
while(*b>0)
{
if( (*b=*b ^ 0x80) >= 0x40 ) return(0xFFFFFFFF); //error
d=(d<<6) | *b;
b++;
}
return(d);
}
|