1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
|
#! /usr/bin/perl
#
# Last modification: Mon, 28 Oct 1996 08:54:50 +0200
#
# j_html2latin - Copyright (c)1996 by Fabrizio Polacco <fpolacco@debian.org>.
# All rights reserved. This program is free software; you can redistribute it
# and/or modify it under the same terms as Perl itself.
# This script is intended to be used to build the debian package of the html
# on-line magazine Pluto Journal, but can be used to easily build also other
# online magazines.
# Usage: j_html2latin <source-file> <dest-file> the script copies <source-file>
# into <dest-file> substituting each HTML entity into the corresponding latin1
# char
# require libwww-perl
use HTML::Entities %entity2char;
die "Usage: j_html2latin <source-file> <dest-file>\n" if scalar(@ARGV) != 2;
$IN = $ARGV[0];
$OUT = $ARGV[1];
open( OUT, "> $OUT") or die "Cannot open output file $OUT\n";
open(IN, $IN) or die "Cannot open input file $IN\n";
# create a local hash
my (%entity2char);
while (($entity, $char) = each(%HTML::Entities::entity2char))
{
# don't convert these entities
next if $entity eq 'amp'; # ampersand
next if $entity eq 'gt'; # greater than
next if $entity eq 'lt'; # less than
next if $entity eq 'quot'; # double quote
$entity2char{$entity} = $char;
}
while( <IN> )
{
s/(&\#(\d+);?)/$2 < 256 ? chr($2) : $1/eg;
s/(&(\w+);?)/$entity2char{$2} || $1/eg;
print OUT;
}
close IN;
close OUT;
|