1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
|
#!/usr/bin/perl -w
use warnings;
use strict;
my $HASHBUCKETS1_16 = 256;
my $HASHBUCKETS1_32 = 16;
my $HASHBUCKETS2_16 = 16;
my $HASHBUCKETS3_16 = 4;
print <<__EOF__;
/*
* This file is part of PhysicsFS (https://icculus.org/physfs/)
*
* This data generated by physfs/extras/makecasefoldhashtable.pl ...
* Do not manually edit this file!
*
* Please see the file LICENSE.txt in the source's root directory.
*/
#ifndef _INCLUDE_PHYSFS_CASEFOLDING_H_
#define _INCLUDE_PHYSFS_CASEFOLDING_H_
#ifndef __PHYSICSFS_INTERNAL__
#error Do not include this header from your applications.
#endif
/* We build three simple hashmaps here: one that maps Unicode codepoints to
a one, two, or three lowercase codepoints. To retrieve this info: look at
case_fold_hashX, where X is 1, 2, or 3. Most foldable codepoints fold to one,
a few dozen fold to two, and a handful fold to three. If the codepoint isn't
in any of these hashes, it doesn't fold (no separate upper and lowercase).
Almost all these codepoints fit into 16 bits, so we hash them as such to save
memory. If a codepoint is > 0xFFFF, we have separate hashes for them,
since there are (currently) only about 120 of them and (currently) all of them
map to a single lowercase codepoint. */
typedef struct CaseFoldMapping1_32
{
PHYSFS_uint32 from;
PHYSFS_uint32 to0;
} CaseFoldMapping1_32;
typedef struct CaseFoldMapping1_16
{
PHYSFS_uint16 from;
PHYSFS_uint16 to0;
} CaseFoldMapping1_16;
typedef struct CaseFoldMapping2_16
{
PHYSFS_uint16 from;
PHYSFS_uint16 to0;
PHYSFS_uint16 to1;
} CaseFoldMapping2_16;
typedef struct CaseFoldMapping3_16
{
PHYSFS_uint16 from;
PHYSFS_uint16 to0;
PHYSFS_uint16 to1;
PHYSFS_uint16 to2;
} CaseFoldMapping3_16;
typedef struct CaseFoldHashBucket1_16
{
const CaseFoldMapping1_16 *list;
const PHYSFS_uint8 count;
} CaseFoldHashBucket1_16;
typedef struct CaseFoldHashBucket1_32
{
const CaseFoldMapping1_32 *list;
const PHYSFS_uint8 count;
} CaseFoldHashBucket1_32;
typedef struct CaseFoldHashBucket2_16
{
const CaseFoldMapping2_16 *list;
const PHYSFS_uint8 count;
} CaseFoldHashBucket2_16;
typedef struct CaseFoldHashBucket3_16
{
const CaseFoldMapping3_16 *list;
const PHYSFS_uint8 count;
} CaseFoldHashBucket3_16;
__EOF__
my @foldPairs1_16;
my @foldPairs2_16;
my @foldPairs3_16;
my @foldPairs1_32;
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
$foldPairs1_16[$i] = '';
}
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
$foldPairs1_32[$i] = '';
}
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
$foldPairs2_16[$i] = '';
}
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
$foldPairs3_16[$i] = '';
}
open(FH,'<','casefolding.txt') or die("failed to open casefolding.txt: $!\n");
while (<FH>) {
chomp;
# strip comments from textfile...
s/\#.*\Z//;
# strip whitespace...
s/\A\s+//;
s/\s+\Z//;
next if not /\A([a-fA-F0-9]+)\;\s*(.)\;\s*(.+)\;/;
my ($code, $status, $mapping) = ($1, $2, $3);
my $hexxed = hex($code);
#print("// code '$code' status '$status' mapping '$mapping'\n");
if (($status eq 'C') or ($status eq 'F')) {
my ($map1, $map2, $map3) = (undef, undef, undef);
$map1 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
$map2 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
$map3 = $1 if $mapping =~ s/\A([a-fA-F0-9]+)(\s*|\Z)//;
die("mapping space too small for '$code'\n") if ($mapping ne '');
die("problem parsing mapping for '$code'\n") if (not defined($map1));
if ($hexxed < 128) {
# Just ignore these, we'll handle the low-ASCII ones ourselves.
} elsif ($hexxed > 0xFFFF) {
# We just need to add the 32-bit 2 and/or 3 codepoint maps if this die()'s here.
die("Uhoh, a codepoint > 0xFFFF that folds to multiple codepoints! Fixme.") if defined($map2);
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_32-1));
#print("// hexxed '$hexxed' hashed1 '$hashed'\n");
$foldPairs1_32[$hashed] .= " { 0x$code, 0x$map1 },\n";
} elsif (not defined($map2)) {
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS1_16-1));
#print("// hexxed '$hexxed' hashed1 '$hashed'\n");
$foldPairs1_16[$hashed] .= " { 0x$code, 0x$map1 },\n";
} elsif (not defined($map3)) {
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS2_16-1));
#print("// hexxed '$hexxed' hashed2 '$hashed'\n");
$foldPairs2_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2 },\n";
} else {
my $hashed = (($hexxed ^ ($hexxed >> 8)) & ($HASHBUCKETS3_16-1));
#print("// hexxed '$hexxed' hashed3 '$hashed'\n");
$foldPairs3_16[$hashed] .= " { 0x$code, 0x$map1, 0x$map2, 0x$map3 },\n";
}
}
}
close(FH);
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
$foldPairs1_16[$i] =~ s/,\n\Z//;
my $str = $foldPairs1_16[$i];
next if $str eq '';
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold1_16_${num}";
print("static const CaseFoldMapping1_16 ${sym}[] = {\n$str\n};\n\n");
}
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
$foldPairs1_32[$i] =~ s/,\n\Z//;
my $str = $foldPairs1_32[$i];
next if $str eq '';
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold1_32_${num}";
print("static const CaseFoldMapping1_32 ${sym}[] = {\n$str\n};\n\n");
}
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
$foldPairs2_16[$i] =~ s/,\n\Z//;
my $str = $foldPairs2_16[$i];
next if $str eq '';
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold2_16_${num}";
print("static const CaseFoldMapping2_16 ${sym}[] = {\n$str\n};\n\n");
}
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
$foldPairs3_16[$i] =~ s/,\n\Z//;
my $str = $foldPairs3_16[$i];
next if $str eq '';
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold3_16_${num}";
print("static const CaseFoldMapping3_16 ${sym}[] = {\n$str\n};\n\n");
}
print("static const CaseFoldHashBucket1_16 case_fold_hash1_16[] = {\n");
for (my $i = 0; $i < $HASHBUCKETS1_16; $i++) {
my $str = $foldPairs1_16[$i];
if ($str eq '') {
print(" { NULL, 0 },\n");
} else {
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold1_16_${num}";
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
}
}
print("};\n\n");
print("static const CaseFoldHashBucket1_32 case_fold_hash1_32[] = {\n");
for (my $i = 0; $i < $HASHBUCKETS1_32; $i++) {
my $str = $foldPairs1_32[$i];
if ($str eq '') {
print(" { NULL, 0 },\n");
} else {
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold1_32_${num}";
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
}
}
print("};\n\n");
print("static const CaseFoldHashBucket2_16 case_fold_hash2_16[] = {\n");
for (my $i = 0; $i < $HASHBUCKETS2_16; $i++) {
my $str = $foldPairs2_16[$i];
if ($str eq '') {
print(" { NULL, 0 },\n");
} else {
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold2_16_${num}";
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
}
}
print("};\n\n");
print("static const CaseFoldHashBucket3_16 case_fold_hash3_16[] = {\n");
for (my $i = 0; $i < $HASHBUCKETS3_16; $i++) {
my $str = $foldPairs3_16[$i];
if ($str eq '') {
print(" { NULL, 0 },\n");
} else {
my $num = '000' . $i;
$num =~ s/\A.*?(\d\d\d)\Z/$1/;
my $sym = "case_fold3_16_${num}";
print(" { $sym, __PHYSFS_ARRAYLEN($sym) },\n");
}
}
print("};\n\n");
print <<__EOF__;
#endif /* _INCLUDE_PHYSFS_CASEFOLDING_H_ */
/* end of physfs_casefolding.h ... */
__EOF__
exit 0;
# end of makecashfoldhashtable.pl ...
|