File: mhtxtsetext.pl

package info (click to toggle)
mhonarc 2.1-1
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 1,932 kB
  • ctags: 2,849
  • sloc: perl: 7,329; makefile: 51
file content (148 lines) | stat: -rw-r--r-- 4,981 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
##---------------------------------------------------------------------------##
##	@(#)  mhtxtsetext.pl 1.1 96/09/17 @(#)
## Library to convert text/setext to HTML.  Adapted for use in MHonArc
## by ehood@medusa.acs.uci.edu, Sept 1994.
## Filter routine can be registered with the following:
##		<MIMEFILTERS>
##		text/setext:m2h_text_setext'filter:mhtxtsetext.pl
##		text/x-setext:m2h_text_setext'filter:mhtxtsetext.pl
##		</MIMEFILTERS>
##---------------------------------------------------------------------------##
# setext -> HTML converter
#
# $Id: setext.pl,v 2.8 1994/06/23 05:44:15 sanders Exp $
#
# Tony Sanders <sanders@earth.com>, June 1993
#
# Status of typotags:
#     header-tt         passed untouched (XXX: use Subject: in next release)
#     title-tt          <H1>...</H1> (and <TITLE> if needed)
#     subhead-tt        <H2>...</H2> (and <TITLE> if needed)
#     indent-tt         reflows paragraphs
#
#     bold-tt           <B>...</B>
#     italic-tt         <I>...</I>
#     underline-tt      <I>...</I>
#     hot-tt            <A HREF="...">...</A>           (see also href-tt)
#
#     quote-tt          <BLOCKQUOTE>...</BLOCKQUOTE>
# NIY bullet-tt         <UL>...</UL>
#
#     twobuck-tt        ignored
#     suppress-tt       suppressed in output
#     twodot-tt         ignored
#
# Additional typotags supported for HTML:
#     href-tt           .. _text HREF
#     isindex-tt        .. <isindex>
#
# setext'html -- converts setext (.etx files) to HTML
# setext'title -- utility routine to convert setext titles and subheads to HTML
#

# TODO:XXX
# I need to figure out how to allow HTML markup in the text while at the
# same time suppresing "unintentional" markup.  For now < & > are HTML'ized.

# Define the translations supported
# $trans{'text/setext'}            = "text/html:setext'html";

package m2h_text_setext;

# parser states
$FMT = 0;       # in free flow text (normal HTML mode)
$PRE = 1;       # in preformated text <PRE>...</PRE>
$QUOTE = 2;     # in blockquote <BLOCKQUOTE>...</BLOCKQUOTE>

sub filter {
    local($header, *fields, *body) = @_;
    local(@data) = split(/\n/,$body);

    $ret = '';
    # first pass, process <HEAD> items and hypertext link information
    for ($i = 0; $i <= $#data; $i++) {
        $_ = $data[$i];                 # $_ is default for m//

        # <ISINDEX> must be inside <HEAD>...</HEAD>
        /^\.\.\s+<isindex>/i &&
            do { $data[$i] = ".."; next; };

        # locate HREF's:  .. _href URL
        /^\.\.\s+_([^\s]*)\s+(.*)\s*/ && do { $href{$1} = $2; next; };

        # first title-tt or subhead-tt gets <TITLE>...</TITLE>
        # &title also adds the <H#>...</H#> to the appropriate line
        /^===/ && do { &title("H1", $i); next; };
        /^---/ && do { &title("H2", $i); next; };
    }

    # second pass, handle remaining typotags
    $curstate = $FMT;
    foreach (@data) {
        # process title information
        /^\.\.\s+(<H.>)(.*)(<\/H.>)/i && do {
            &to_fmt; $ret .= $1. &htmlize($2). $3. "\n"; next; };
        next if /^\.\./;

        # handle line breaks
        if ($curstate == $FMT && /^\s*$/) {
            $ret .= "<P>\n" unless $fold++; next; }
        $fold = 0;

        # state transitions
        if (/^>\s/) { &to_quote; }
        elsif (/^  [^ ]/) { &to_fmt; }
        else { &to_pre; }

        s/^>\s*//;                                              # fix quote-tt
        s/^  ([^ ])/\1/;                                        # fix indent-tt

        # bold-tt
            s#\*\*([^\*]*)\*\*#\376B\377$1\376/B\377#;
        # italic-tt
            s#~([^~]*)~#\376I\377$1\376/I\377#;
        # hot-tt
            s#\b([^\s]*)_\b#
                $h = $href{$1}; ($a = $1) =~ s,_, ,g;
                $h ? qq'\376A HREF="$h"\377$a\376/A\377' : "\376I\377$a\376/I\377"; #e;
        # underline-tt
            s#_([^\s]*)_#
                ($a = $1) =~ s,_, ,g; "\376I\377$a\376/I\377"; #e;
        $ret .= &htmlize($_). "\n";
    }
    &to_fmt;
    ($ret);
}

sub to_fmt {
    return if $curstate == $FMT;
    $ret .= "</PRE>\n" if $curstate == $PRE;
    $ret .= "</PRE></BLOCKQUOTE>\n" if $curstate == $QUOTE;       #XXX
    $curstate = $FMT;
}
sub to_pre {
    return if $curstate == $PRE;
    $ret .= "<PRE>\n" if $curstate == $FMT;
    $ret .= "</PRE></BLOCKQUOTE><PRE>\n" if $curstate == $QUOTE;  #XXX
    $curstate = $PRE;
}
sub to_quote {
    return if $curstate == $QUOTE;
    $ret .= "<BLOCKQUOTE><PRE>\n" if $curstate == $FMT;           #XXX
    $ret .= "</PRE><BLOCKQUOTE><PRE>\n" if $curstate == $PRE;     #XXX
    $curstate = $QUOTE;
}
sub htmlize {
    local($_) = @_;
    s/\&/\&\#38\;/g; s/\</\&\#60\;/g; s/\>/\&\#62\;/g;
    s/\376/</g; s/\377/>/g;                                     # convert back
    $_;
}
sub title {
    local($head, $i) = @_;
    $data[$i--] = ".."; $data[$i] =~ s/^\s*//;
    # $ret .= "<TITLE>$data[$i]</TITLE>\n" unless $title++;
    $data[$i] = ".. <$head>" . $data[$i] . "</$head>";
}

1;