1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
|
#! /usr/bin/perl -w
use strict;
# Extract the relevant parts of the LDP OMF file, and transform identifier
# URLs to match the installed locations.
my ($prefix, $index, $omf) = @ARGV;
open INDEX, "< $index" or die "Couldn't open $index: $!";
my %good_url;
map { chomp; $good_url{$_} = 1; } <INDEX>;
close INDEX;
open OMF, "< $omf" or die "Couldn't open $omf: $!";
my @lines = <OMF>;
close OMF;
my @blocks;
for (@lines) {
if (/^</ and not m#</resource>#) {
push @blocks, $_;
} else {
$blocks[$#blocks] .= $_;
}
}
my $format = qq{<format mime="text/html"/>};
# Put everything in this category for now.
my $subject = qq{<subject><category>General|Other</category></subject>};
my $resid = 1;
for (my $id = 0; $id < @blocks; $id++) {
next unless defined $blocks[$id]; # if we spliced away the last element
next unless $blocks[$id] =~
m# ^\s* <identifier> (.*?) </identifier> \s*$ #mx;
my $url = $1;
$url =~ s#http://.*?/##;
my $good = 0;
if ($url =~ m#^HOWTO/#) {
if ($good_url{$url}) {
$good = 1;
} elsif ($url =~ m#/$# and $good_url{"${url}index.html"}) {
$good = 1;
} elsif ($good_url{"$url/index.html"}) {
$good = 1;
}
}
if ($good) {
$url =~ s/HOWTO/$prefix/;
$blocks[$id] =~ s/<resource id=.*?>/<resource id='$resid'>/;
$resid++;
$blocks[$id] =~
s{^(\s*)<identifier>.*?</identifier>\s*$}
{$1<identifier url="$url"/>\n$1$format\n$1$subject}m;
$blocks[$id] =~ s#<language>(.*?)</language>#<language code="$1"/>#;
} else {
splice @blocks, $id, 1;
redo;
}
}
print @blocks;
|