1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
#!/usr/bin/perl
use HTML::TreeBuilder;
use HTML::Element;
# Check if file exists and get last modified time for header
$filename = defined $ARGV[0] ? $ARGV[0] : "doc/babel101/babelcmd.html";
die "Cannot read $filename" unless ( -r $filename );
@STAT = stat $filename;
$mtime = $STAT[9];
# Parse html file
$tree = HTML::TreeBuilder->new();
$tree->parse_file( $filename );
# Get Babel version from <h1>.../h1>
$body = $tree->look_down( "_tag", "body" );
$h1 = $body->look_down( "_tag", "h1" );
undef $babel_version;
$babel_version = $1 if ( $h1 && ($h1->as_text =~ /Babel ([0-9.x]+) /) );
# Write man page header
print ".TH BABEL 1 \"";
print "Version $babel_version: " if ( $babel_version );
@DATE=localtime $mtime;
@MONTHS=qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
printf "%d %s %d\"\n", $DATE[3], $MONTHS[$DATE[4]], 1900 + $DATE[5];
# Scan up to first <h2> tag
@doclist = $body->content_list();
until ( ref($doclist[0]) and ($doclist[0]->tag eq "h2") )
{
shift @doclist;
}
# Interpret remainder as <h2> section name </h2> section body <h2 > ...
while( $#doclist >= 0 )
{
$elem = shift @doclist;
die "Internal Error" if ( $elem->tag ne "h2" );
$label = $elem->as_text;
$label =~ //-/\\-/;
printf "\n.SH %s\n", uc($label);
while ( $#doclist >= 0 )
{
$elem = shift @doclist;
if ( ! ref($elem) )
{
# Elem is just text (not a tag)
$elem =~ s/\xA0//g;
print "$elem";
next;
}
$tag = $elem->tag;
if ($tag eq "h2")
{
unshift @doclist, $elem;
last;
}
# elsif ($tag eq "br")
# {
# print "\n";
# next;
# }
print "\n.PP\n" if ( $tag eq "p" );
@doclist = ($elem->content_list, @doclist);
}
}
|