File: txt2html.pl

package info (click to toggle)
dssi 0.9.1-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 1,852 kB
  • ctags: 499
  • sloc: sh: 8,531; ansic: 3,241; cpp: 1,042; makefile: 211; perl: 191
file content (163 lines) | stat: -rw-r--r-- 3,772 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/perl -w
# 
# A plain-text documentation processor.
# Copyright Chris Cannam 1998

use strict;

my $hbase = 2; # main heading is h$hbase, next is h($hbase+1) etc

my @p;
my $laststyle = "none";

my $title = $ARGV[0];
if (defined $title) { $title =~ s/\.txt//; }
else { $title = "Untitled"; }

my $filename = $ARGV[0];
if (!defined $filename) { $filename = "(stdin)"; }

print '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">';
print "\n<html><head>\n";
print '<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">';
print "\n<title>$title</title></head><body>\n<!-- Do not edit! Automatically generated from $filename by $0 -->\n";
#print "<contents>\n"; # for automatic TOC generator

my $hadtoc = 0;

# Returned strings here are symbolic -- they only resemble HTML.
# Don't change them, change the interpretation of them in write_para

sub style_para
{
    # heading?
    if ($#p == 1) {
	$p[1] =~ s/^=+$// and return "h1";
	$p[1] =~ s/^-+$// and return "h2";
	$p[1] =~ s/^~+$// and return "h3";
    }

    # something wholly indented?
    my @nni = grep { /^\s/ } @p;
    if (@p and $#nni == $#p) {
	$p[0] =~ s/^\s+[^\w\s\$\%\#]\s+([\w\"&])/$1/ and return "uli";
	$p[0] =~ s/^\s+\d+[^\w\s]\s+([\w\"&])/$1/ and return "oli";
	$p[0] =~ s/^\s+[a-z][^\w\s]\s+([\w\"&])/$1/ and return "cli";
	$p[0] =~ s/^(\s+[^\w\s\d])/$1/ and return "pre";
	return "cont";
    }
    
    # anything else
    return "p";
}

sub write_head
{
    my $n = shift;
    my $content = shift;
    chop $content;
    my $m = $hbase + $n - 1;
    print "<h$m>$content</h$m>\n";
} 

sub write_para
{
    my $style = shift;

    if ($style eq "cont" and
	!($laststyle eq "uli" or $laststyle eq "oli" or $laststyle eq "cli")) {
	$style = "pre";
    }

    if (!($style eq $laststyle or $style eq "cont")) {

	if ($laststyle eq "uli") {
	    print "</ul>";
	} elsif ($laststyle eq "oli") {
	    print "</ol>";
	} elsif ($laststyle eq "cli") {
	    print "</ol>";
	} elsif ($laststyle eq "pre") {
	    print "</pre>";
	}

	if ($style eq "uli") {
	    print "<ul>";
	} elsif ($style eq "oli") {
	    print "<ol>";
	} elsif ($style eq "cli") {
	    print "<ol type='a'>";
	} elsif ($style eq "pre") {
	    print "<pre>";
	}
    }

    print "\n";

    if (!$hadtoc and $style ne "h1") {
        print "<CONTENTS>\n";
        $hadtoc = 1;
    }

    if ($style eq "h1") {
        write_head 1, $p[0];
    } elsif ($style eq "h2") {
        write_head 2, $p[0];
    } elsif ($style eq "h3") {
        write_head 3, $p[0];
    } elsif ($style eq "uli" or $style eq "oli" or $style eq "cli") {
	print "<br/><br/></li>" if ($laststyle eq $style);
	$p[0] =~ s/^\s+(?:\d+)?[^\w\s]\s+//;
	print "<li>@p";
    } elsif ($style eq "cont") {
	$style = $laststyle;
	if ($style eq "uli" or $style eq "oli" or $style eq "cli") {
	  print "<br/><br/>@p";
	} else {
	  print "<p>@p";
	}
    } elsif ($style eq "p") {
	print "<p>@p</p>";
    } else {
	print @p;
    }

    if (!$hadtoc and $style eq "h1") {
        print "<CONTENTS>\n";
        $hadtoc = 1;
    }

    $laststyle = $style;
}

while (<>)
{
    s/&/&amp;/g;
    s/</&lt;/g;
    s/>/&gt;/g;

    s, &lt; URL: ( [^&:\"]* ) .txt &gt; ,<A HREF="$1.html">$1.html</A>,xg;
    s, &lt; URL: ( [^&\"]*  )      &gt; ,<A HREF="$1">$1</A>,xg;

    s,    \*( \w[\w\s0-9-]* )\*    ,<B>$1</B>,xg;
    s, \b  _( \w[\w\s0-9-]* )_  \b ,<I>$1</I>,xg;

    s, \\([\[\]]) ,$1,xg or s, \[( .*? )\] ,<CODE>$1</CODE>,xg;

    if (/^\s*$/) {
	write_para style_para if $#p >= 0;
	undef @p;
    } else {
	push @p, $_;
	if (/^[=~-]+$/ && $#p == 1) {
	    write_para style_para;
	    undef @p;
	}
    }
}

#write_para style_para if defined @p;
write_para style_para;
print "\n</CONTENTS>\n</body>\n</html>\n";