1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
#This script extracts a table of Unicode character names from the
#UnicodeData file. It also generates the ancillary offset table.
BEGIN{
FS=";";
printf("char * unames[]={\n");
printf("#include \"unicode.h\"\n#include \"offsettbl.h\"\nstruct oft Offset_Table[]={\n") > "offsettbl.c";
prev=-1;
cur=-1;
Offset = 0;
PreviousLineType = 0;
}
{
hone = "0x" $1; cur = strtonum(hone);
# if(cur > 0x2FA1D) nextfile; # That is, stop (after executing the END action).
#Deal with undefined ranges.
if(cur > prev+1){
if(PreviousLineType == 1){ #We just ended a defined range.
printf("0x%X,0x%X,%d,\n",DefinedRangeStart,prev,Offset) > "offsettbl.c";
}
printf("0x%X,0x%X,OFT_UNDEF,\n",prev+1,cur-1) > "offsettbl.c";
Offset += (cur - prev -1);
PreviousLineType = 0;
}
#Deal with specified ranges
if(match($2,/First>$/)){
RangeStart=cur;
getline;
if(!match($2,/Last>$/)){
printf(stderr,"Error: first and last range specs not adjacent at line %d.\n",NR) > "/dev/stderr";
exit;
}
hone = "0x" $1; cur = strtonum(hone);
RangeEnd = cur;
if(PreviousLineType == 1){ #We just ended a defined range.
printf("0x%X,0x%X,%d,\n",DefinedRangeStart,RangeStart-1,Offset) > "offsettbl.c";
}
RangeName = substr($2,2,RSTART-4);
Offset += (RangeEnd - RangeStart+1);
printf("0x%X,0x%X,OFT_NOINF,\n",RangeStart,RangeEnd) > "offsettbl.c";
PreviousLineType = 0;
prev=cur;
next;
}
#The Latin-1 control characters all have the official Unicode name "control", so we
#use the real name in the comment field for them when it exists.
if(PreviousLineType == 0) DefinedRangeStart = cur;
if(match($2,"<control>")){
if(length($11) > 0) Name = $11;
else Name = "unspecified control character";
}
else Name = $2;
printf("\"%s\",\t/* 0x%04X */\n",Name,cur);
PreviousLineType = 1;
prev=cur;
}
END{
if(PreviousLineType == 1){ #We just ended a defined range.
printf("0x%X,0x%X,%d,\n",DefinedRangeStart,prev,Offset) > "offsettbl.c";
}
printf("};\n");
printf("};\nint Offset_Entries = sizeof(Offset_Table)/sizeof(struct oft);\n") > "offsettbl.c";
}
|