File: html2docs

package info (click to toggle)
doc-linux 2008.08-1
  • links: PTS, VCS
  • area: main
  • in suites: lenny, squeeze
  • size: 21,252 kB
  • ctags: 127
  • sloc: perl: 420; sh: 161; makefile: 147
file content (178 lines) | stat: -rw-r--r-- 4,290 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/perl

use strict;
use warnings;

# Copyright (c) 1998 by Marco Budde (Budde@tu-harburg.de)
# Copyright (c) 2001, 2002, 2003 by Colin Watson (cjwatson@debian.org)
# Copyright (c) 2008 Frank Lichtenheld (djpig@debian.org)
# GNU General Public License

################################################################
# HOWTO-INDEX -> doc-base                                      #
#                                                              #
# usage:  html2docs <package name> <doc-base section>          #
#                   <unpacked HOWTOs directory> <files>        #
################################################################

my $package       = shift;
my $section       = shift;
my $unpacked_dir  = shift;

my $root = '/usr/share/doc/HOWTO/en-html';

##############################
#  get abstract of document  #
##############################

sub get_abstracts ($)
{
    my $filename = shift;
    my %docs;
    my $index;

    open IN, '<:encoding(iso-8859-1)', $filename or die "can't open $filename: $!\n";
    {
	local $/ = undef;
	$index = scalar <IN>;
    }
    close IN;

    # Transform silly DocBook-generated HTML into something more easily
    # parseable.
    $index =~ s/\n>/>/g;
    $index =~ s/\n/ /g;

    my ($link, $title);
    for my $paragraph ($index =~ m!<P>.*?</P>!g) {
	# Sorry for the convoluted control flow here.
	my $abstract;
	if ($paragraph =~ m!<A\s*HREF="\.\./(.*?)" .*?		# link
			    <I\s*CLASS="CITETITLE">(.*?)</I>	# title
			    !gx) {
	    ($link, $title) = ($1, $2);
	    next;
	} elsif ($paragraph =~ m!<I.*?>.*?</I>\.\s*		# skip date
				 (.*?)				# abstract
				 </P>!gx) {
	    $abstract = $1;
	} else {
	    next;
	}

	# Clean up whitespace.
	$abstract =~ s/^\s+//;
	$abstract =~ s/\s+$//;
	$abstract =~ s/\s\s+/ /g;
	# Dispose of some HTMLisms.
	$title =~ s/&#822[01];/"/g;
	$title =~ s/&#(\d+);/chr $1/eg;
	$abstract =~ s/&#822[01];/"/g;
	$abstract =~ s/&#(\d+);/chr $1/eg;
	$abstract =~ s!<EM>!!;
	$abstract =~ s!</EM>!!;
	$abstract =~ s!<I .*?>!!;
	$abstract =~ s!</I>!!;
	$abstract =~ s!<A .*?>!!;
	$abstract =~ s!</A>!!;
	# Improve sorting.
	$title =~ s/^the //i;
	$docs{$link} = {title => $title, abstract => $abstract};
    }

    return %docs;
}


####################
#  write doc-base  #
####################

sub write_doc_base ($$$$)
{
    my ($filename, $docid, $linkname, $abstract) = @_;
    # Fix spelling errors since lintian complains otherwise
    $abstract =~ s/\blinux\b/Linux/g;
    $abstract =~ s/\busefull\b/useful/g;
    $abstract =~ s/\bseperate\b/separate/g;
    open DOCBASE, '>:utf8', "debian/$package.doc-base.$docid"
	or die "can't write to debian/$package.doc-base.$docid: $!";
    print DOCBASE <<EOF;
Document: $docid
Title: $linkname
Abstract: $abstract
Section: $section

Format: HTML
Index: $root/$filename
Files: $root/$filename

EOF
    close DOCBASE;
}

################
#     main     #
################

my @docids;
for my $filename (@ARGV)
{
    my %abstracts = get_abstracts $filename;
    for my $docfile (sort grep { -e "$unpacked_dir/$_" } keys %abstracts)
    {
	my $docid = lc $docfile;
	$docid =~ s!(?:/index)?\.html!!;
	next if $docid eq 'howto-index' and ($package =~ /nonfree/);
	$docid = "ldp-en-$docid";
	if (defined $ENV{DEB_BUILD_OPTIONS} and
	    $ENV{DEB_BUILD_OPTIONS} =~ /debug/)
	{
	    print "$docid: $abstracts{$docfile}{title}\n";
	    print "$abstracts{$docfile}{abstract}\n\n";
	}
	write_doc_base $docfile, $docid, $abstracts{$docfile}{title},
		       $abstracts{$docfile}{abstract};
	push @docids, $docid;
    }
}

unshift @docids, "linux-howtos", "linux-faq"
    unless $package =~ /nonfree/;

open PRERM, '>', "debian/$package.prerm"
    or die "can't open debian/$package.prerm: $!\n";
print PRERM <<EOF;
#!/bin/sh

set -e

#DEBHELPER#

if [ "\$1" = remove ] || [ "\$1" = upgrade ] && which install-docs >/dev/null 2>&1; then
        install-docs -r @docids
fi

exit 0
EOF

close PRERM;

my @docbasefiles = map { "/usr/share/doc-base/$_" } @docids;
open POSTINST, '>', "debian/$package.postinst"
    or die "can't open debian/$package.postinst: $!\n";
print POSTINST <<EOF;
#!/bin/sh

set -e

if [ "\$1" = configure ] && which install-docs >/dev/null 2>&1; then
        install-docs -i @docbasefiles
fi

#DEBHELPER#

exit 0
EOF

close POSTINST;