File: fixlinks

package info (click to toggle)
lg-base 28-1
  • links: PTS
  • area: main
  • in suites: hamm
  • size: 1,192 kB
  • ctags: 30
  • sloc: perl: 85; makefile: 30; sh: 1
file content (133 lines) | stat: -rw-r--r-- 2,858 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/perl
#
# fixlinks 0.2
# Copyright (c) 1997 by Christian Schwarz <schwarz@monet.m.isar.de>
# May by distributed under GPL 2.
#

# Specification:
#
# Processes HTML files specified on the command line and removes
# all references to non-existing local files. The output is written
# to the same files, while the old are renamed. 
#
# A link     <a href="foo.html">Foos and Bars</a>    is changed to
#         <!--REF:foo.html a href="foo.html"-->Foos and Bars<!--/a--> if the
# file foo.html does not exist, and vice versa.
#
# Note, that this program only handles HTML files correctly, that do
# comply with HTML syntax.

use Cwd;
use File::Basename;

package Parser; #-------------------------------
use HTML::Parser;
@ISA = qw(HTML::Parser);

sub declaration {
  my ($self, $decl) = @_;
  print ::OUT "<!$decl>";
}

sub start {
  my ($self, $tag, $attr, $attrseq, $origtext) = @_;

  my ($href);
  if (($tag eq 'a') and defined($href = $$attr{'href'}) and
      (not ($href =~ s/^(\S+:)//o) or ($1 =~ /file:/i))) {
    my $type = $1;
    $href =~ s/(\#.*)$//o;
    my $anchor = $1;
    #print "href: ($type,$href,$anchor)\n";

    # file exists?
    if (-f $href) {
      # yes, no change
    } else {
      # no! Change URL
      $origtext = "<!--REF:$href a";
      for $tag (@$attrseq) {
	if ($$attr{$tag}) {
	  $origtext .= " $tag=\"$$attr{$tag}\"";
	} else {
	  $origtext .= " $tag";
	}
      }
      $origtext .= "-->";
      $comment_next = 1;
    }
  }

  print ::OUT $origtext;
}

sub end {
  my ($self, $tag) = @_;

  if (($tag eq 'a') and ($comment_next)) {
    print ::OUT "<!--ENDREF a-->";
    $comment_next = 0;
  } else {
    print ::OUT "</$tag>";
  }
}

sub text {
  my ($self, $text) = @_;
  print ::OUT $text;
}

sub comment {
  my ($self, $comment) = @_;

  if ($comment =~ /^REF:(\S+)\s+(.*)$/o) {
    if (-f $1) {
      # file exists!
      print ::OUT "<$2>";
      $uncomment_next = 1;
      return;
    } else {
      # file still doesn't exist, no change
    }
  } elsif ($uncomment_next and ($comment =~ s/^ENDREF\s+(.+)$//o)) {
    print ::OUT "</$1>";
    $uncomment_next = 0;
    return;
  }

  print ::OUT "<!--$comment-->";
}

#########################################################################

package main;

if ($#ARGV == -1) {
  print "usage: fixlinks <html file> ...\n";
  exit 1;
}

my $starting_dir = cwd;

my $p = Parser->new;

my $filename;
while ($filename = shift) {
  if ( ! -f $filename ) {
    print "error: file $filename not found, skipping.\n";
    next;
  }

  rename($filename,"$filename~") or die "cannot rename $filename: $!";

  open(OUT,">$filename") or die "cannot open output file $filename: $!";
  $rpath = dirname($filename);
  chdir($rpath) if $rpath;
  $p->parse_file(basename($filename)."~");
  close(OUT);
  unlink(basename($filename)."~");
  chdir($starting_dir);
}

exit 0;