1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
#!/usr/bin/perl
#
# fixlinks 0.2
# Copyright (c) 1997 by Christian Schwarz <schwarz@monet.m.isar.de>
# May by distributed under GPL 2.
#
# Specification:
#
# Processes HTML files specified on the command line and removes
# all references to non-existing local files. The output is written
# to the same files, while the old are renamed.
#
# A link <a href="foo.html">Foos and Bars</a> is changed to
# <!--REF:foo.html a href="foo.html"-->Foos and Bars<!--/a--> if the
# file foo.html does not exist, and vice versa.
#
# Note, that this program only handles HTML files correctly, that do
# comply with HTML syntax.
use Cwd;
use File::Basename;
package Parser; #-------------------------------
use HTML::Parser;
@ISA = qw(HTML::Parser);
sub declaration {
my ($self, $decl) = @_;
print ::OUT "<!$decl>";
}
sub start {
my ($self, $tag, $attr, $attrseq, $origtext) = @_;
my ($href);
if (($tag eq 'a') and defined($href = $$attr{'href'}) and
(not ($href =~ s/^(\S+:)//o) or ($1 =~ /file:/i))) {
my $type = $1;
$href =~ s/(\#.*)$//o;
my $anchor = $1;
#print "href: ($type,$href,$anchor)\n";
# file exists?
if (-f $href) {
# yes, no change
} else {
# no! Change URL
$origtext = "<!--REF:$href a";
for $tag (@$attrseq) {
if ($$attr{$tag}) {
$origtext .= " $tag=\"$$attr{$tag}\"";
} else {
$origtext .= " $tag";
}
}
$origtext .= "-->";
$comment_next = 1;
}
}
print ::OUT $origtext;
}
sub end {
my ($self, $tag) = @_;
if (($tag eq 'a') and ($comment_next)) {
print ::OUT "<!--ENDREF a-->";
$comment_next = 0;
} else {
print ::OUT "</$tag>";
}
}
sub text {
my ($self, $text) = @_;
print ::OUT $text;
}
sub comment {
my ($self, $comment) = @_;
if ($comment =~ /^REF:(\S+)\s+(.*)$/o) {
if (-f $1) {
# file exists!
print ::OUT "<$2>";
$uncomment_next = 1;
return;
} else {
# file still doesn't exist, no change
}
} elsif ($uncomment_next and ($comment =~ s/^ENDREF\s+(.+)$//o)) {
print ::OUT "</$1>";
$uncomment_next = 0;
return;
}
print ::OUT "<!--$comment-->";
}
#########################################################################
package main;
if ($#ARGV == -1) {
print "usage: fixlinks <html file> ...\n";
exit 1;
}
my $starting_dir = cwd;
my $p = Parser->new;
my $filename;
while ($filename = shift) {
if ( ! -f $filename ) {
print "error: file $filename not found, skipping.\n";
next;
}
rename($filename,"$filename~") or die "cannot rename $filename: $!";
open(OUT,">$filename") or die "cannot open output file $filename: $!";
$rpath = dirname($filename);
chdir($rpath) if $rpath;
$p->parse_file(basename($filename)."~");
close(OUT);
unlink(basename($filename)."~");
chdir($starting_dir);
}
exit 0;
|