1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
|
#!/usr/bin/perl
while (<>) {
chomp;
s/(.*META HTTP-EQUIV=\"CONTENT-TYPE\".*)iso-8859-1(.*)/$1utf-8$2/go;
while (s/&#(\d+)\;/&sgml($1)/ge) {
}
print;
print "\n";
}
sub sgml {
$str = $_[0];
if ($_[0] >= 0x800) {
$str = chr (0xe0 | ($_[0] >> 12));
$str .= chr (0x80 | (($_[0] >> 6) & 0x3f));
$str .= chr (0x80 | ($_[0] & 0x3f));
} elsif ($_[0] >= 0x80) {
$str = chr (0xc0 | ($_[0] >> 6));
$str .= chr (0x80 | ($_[0] & 0x3f));
} else {
$str .= chr ($str);
}
$str;
}
|