1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
#!/usr/bin/perl -w
# Script parses the XML file for the appendix on preseeding and extracts
# example snippts to form the raw preseed example file. Section titles are
# added as headers.
# The script will include all text between <informalexample> tags that have
# the attribute 'role="example"' set, except if a 'condition' attribute is
# in force that does not match the specified release or if an 'arch' attribute
# is in force that does not match the specified architecture.
# Define module to use
use HTML::Parser();
use Getopt::Std;
local %tagstatus;
local %example;
local %ignore;
local $prevtag = '';
local $titletag;
local $settitle = 0;
$example{'print'} = 0;
$example{'in_sect'} = 0;
$example{'first'} = 1;
$example{'new'} = 0;
getopts('hda:r:') || die "Unknown command line arguments! Try $0 -h\n";
use vars qw($opt_h $opt_d $opt_a $opt_r);
if ($opt_h) {
print <<END;
preseed.pl: parses preseed appendix xml file to extract preseed example file
Usage: $0 [-hdac] <xml-file>
Options:
-h display this help information
-d debug mode
-a <arch> architecture for which to generate the example
(default: i386)
-r <release> release for which to generate the example (required)
END
exit 0;
}
die "Must specify release for which to generate example." if ! $opt_r;
my $xmlfile = shift;
die "Must specify XML file to parse!" if ! $xmlfile;
die "Specified XML file \"$xmlfile\" not found." if ! -f $xmlfile;
my $arch = $opt_a ? "$opt_a" : "i386";
my $release = $opt_r;
# Create instance
$p = HTML::Parser->new(
start_h => [\&start_rtn, 'tagname, text, attr'],
text_h => [\&text_rtn, 'text'],
end_h => [\&end_rtn, 'tagname']);
# Start parsing the specified file
$p->parse_file($xmlfile);
# Execute when start tag is encountered
sub start_rtn {
my ($tagname, $text, $attr) = @_;
print STDERR "\nStart: $tagname\n" if $opt_d;
if ( $tagname =~ /appendix|sect1|sect2|sect3|para/ ) {
$tagstatus{$tagname}{'count'} += 1;
print STDERR "$tagname $tagstatus{$tagname}{'count'}\n" if $opt_d;
if ( ! exists $ignore{'tag'} ) {
if ( exists $attr->{condition} ) {
print STDERR "Condition: $attr->{condition}\n" if $opt_d;
if ( $attr->{condition} ne $release ) {
$ignore{'tag'} = $tagname;
$ignore{'depth'} = $tagstatus{$tagname}{'count'};
print STDERR "Start ignore because of condition" if $opt_d;
}
}
if ( exists $attr->{arch} ) {
print STDERR "Architecture: $attr->{arch}\n" if $opt_d;
if ( $attr->{arch} ne $arch ) {
$ignore{'tag'} = $tagname;
$ignore{'depth'} = $tagstatus{$tagname}{'count'};
print STDERR "Start ignore because of architecture" if $opt_d;
}
}
}
}
# Assumes that <title> is the first tag after a section tag
if ( $prevtag =~ /sect1|sect2|sect3/ ) {
$settitle = ( $tagname eq 'title' );
$titletag = $prevtag;
$example{'in_sect'} = 0;
}
$prevtag = $tagname;
if ( $tagname eq 'informalexample' && ! exists $ignore{'tag'} ) {
if ( exists $attr->{role} && $attr->{role} eq "example" ) {
$example{'print'} = 1;
$example{'new'} = 1;
}
}
}
# Execute when text is encountered
sub text_rtn {
my ($text) = @_;
if ( $settitle ) {
# Clean leading and trailing whitespace for titles
$text =~ s/^[[:space:]]*//;
$text =~ s/[[:space:]]*$//;
$tagstatus{$titletag}{'title'} = $text;
$settitle = 0;
}
if ( $example{'print'} ) {
# Print section headers
for ($s=1; $s<=3; $s++) {
my $sect="sect$s";
if ( $tagstatus{$sect}{'title'} ) {
print "\n" if ( $s == 1 && ! $example{'first'} );
for ( $i = 1; $i <= 5 - $s; $i++ ) { print "#"; };
print " $tagstatus{$sect}{'title'}\n";
delete $tagstatus{$sect}{'title'};
}
}
# Clean leading whitespace
if ( $example{'new'} ) {
$text =~ s/^[[:space:]]*//;
}
# Replace entities in examples
# FIXME: should maybe be extracted from entity definition
$text =~ s/&archive-mirror;/http.us.debian.org/;
print "$text";
$example{'first'} = 0;
$example{'new'} = 0;
$example{'in_sect'} = 1;
}
}
# Execute when the end tag is encountered
sub end_rtn {
my ($tagname) = @_;
print STDERR "\nEnd: $tagname\n" if $opt_d;
if ( $tagname eq 'informalexample' ) {
$example{'print'} = 0;
}
if ( $tagname =~ /appendix|sect1|sect2|sect3|para/ ) {
delete $tagstatus{$tagname}{'title'} if exists $tagstatus{$tagname}{'title'};
if ( exists $ignore{'tag'} ) {
if ( $ignore{'tag'} eq $tagname && $ignore{'depth'} == $tagstatus{$tagname}{'count'} ) {
delete $ignore{'tag'};
}
}
if ( $example{'in_sect'} ) {
print "\n";
$example{'in_sect'} = 0;
}
$tagstatus{$tagname}{'count'} -= 1;
print STDERR "$tagname $tagstatus{$tagname}{'count'}\n" if $opt_d;
die "Invalid XML file: negative count for tag <$tagname>!" if $tagstatus{$tagname}{'count'} < 0;
}
}
|