File: j_html2latin

package info (click to toggle)
journal-dev 1-4
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 72 kB
  • ctags: 9
  • sloc: perl: 135; makefile: 50
file content (48 lines) | stat: -rwxr-xr-x 1,386 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#! /usr/bin/perl
#
# Last modification: Mon, 28 Oct 1996 08:54:50 +0200
#
# j_html2latin - Copyright (c)1996 by Fabrizio Polacco <fpolacco@debian.org>.
# All rights reserved.  This program is free software; you can redistribute it
# and/or modify it under the same terms as Perl itself.

# This script is intended to be used to build the debian package of the html
# on-line magazine Pluto Journal, but can be used to easily build also other
# online magazines.

# Usage: j_html2latin <source-file> <dest-file> the script copies <source-file>
# into <dest-file> substituting each HTML entity into the corresponding latin1
# char 

# require libwww-perl
use HTML::Entities %entity2char;

die "Usage: j_html2latin <source-file> <dest-file>\n" if scalar(@ARGV) != 2;
$IN = $ARGV[0];
$OUT = $ARGV[1];

open( OUT, "> $OUT") or die "Cannot open output file $OUT\n";
open(IN, $IN) or die "Cannot open input file $IN\n";

# create a local hash
my (%entity2char);
while (($entity, $char) = each(%HTML::Entities::entity2char))
{
	# don't convert these entities
	next if $entity eq 'amp';	# ampersand 
	next if $entity eq 'gt';	# greater than
	next if $entity eq 'lt';	# less than
	next if $entity eq 'quot';	# double quote
	$entity2char{$entity} = $char;
}

while( <IN> )
{
	s/(&\#(\d+);?)/$2 < 256 ? chr($2) : $1/eg;
	s/(&(\w+);?)/$entity2char{$2} || $1/eg;
	print OUT;
}

close IN;
close OUT;