File: sanitize.pl

package info (click to toggle)
texlive-doc 2005.dfsg.2-1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 125,100 kB
  • ctags: 12,558
  • sloc: xml: 24,267; perl: 14,394; makefile: 727; sh: 324; lisp: 276; java: 159; sed: 4
file content (195 lines) | stat: -rw-r--r-- 6,946 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# $Id: sanitize.pl,v 1.11 2003/06/20 10:51:44 rf Exp rf $

# provides the sanitize_line function used by all texfaq2* files
#
# will not compile standing alone

# Convert a LaTeX line to HTML:

sub sanitize_line {
    s"\&"\&amp\;"g;
    s"\<"\&lt\;"g;
    s"\>"\&gt\;"g;
    if ($converting && !$ignoring) {
	s"\\vspace\*\{[^\}]*\}""g;
	s"``"\""g;
	s"''"\""g;
        s"^\s*$"<p>";
	s"\%.*"";
	s"\\obracesymbol\{\}"\&lbrace\;"g;
	s"\\cbracesymbol\{\}"\&rbrace\;"g;
        s"\\\{"\&lbrace\;"g;
	s"\\\}"\&rbrace\;"g;
	s"\\ae\{\}"\&aelig\;"g;
        s"\\AllTeX\{\}"(La)TeX"g;
	s"\\twee\{\}"2e"g;
        s"\\LaTeXe\{\}"LaTeX2e"g;
        s"\\LaTeXo\{\}"LaTeX 2.09"g;
        s"\\MF\{\}"Metafont"g;
        s"\\MP\{\}"MetaPost"g;
        s"\\BV\{\}"<i>Baskerville</i>"g;
        s"\\TUGboat\{\}"<i>TUGboat</i>"g;
        s"\\PDFTeX\{\}"PDFTeX"g;
        s"\\PDFLaTeX\{\}"PDFLaTeX"g;
        s"\\CONTeXT\{\}"ConTeXt"g;
        s"\\NTS\{\}"<i>NTS</i>"g;
        s"\\eTeX\{\}"e-TeX"g;
        s"\\Eplain\{\}"Eplain"g;
        s"\\TeXsis\{\}"TeXsis"g;
	s"\\YandY\{\}"Y&amp;Y"g;
        s"\\WYSIWYG\{\}"WYSIWYG"g;
	s"\\PS\{\}"PostScript"g;
        s"\\dots\{\}"..."g;
        s"\\ldots\{\}"..."g;
        s"\\large""g;
        s"\\pounds\{\}"&pound;"g;
	s"\\arrowhyph\{\}"-&gt; "g;
        s"\\protect""g;
        s"\-\-\-"\-"g;
        s"\-\-"\-"g;
        s"\\(\w+)\{\}"$1"g;
        s"\\\"a"\&auml\;"g;
        s"\\\"o"\&ouml\;"g;
        s"\\\'e"\&eacute\;"g;
        s"\\\^e"\&ecirc\;"g;
	s"\\\'o"\&oacute\;"g;
	s"\\ss"\&szlig\;"g;
       	s"`"'"g;
        s"\\label\{[^\}]*\}""g;
        s"\\acro\{([^\}]*)\}"$1"g;
        s"\\ensuremath\{([^\}]*)\}"$1"g;
        s"\\emph\{([^\}]*)\}"<em>$1</em>"g;
        s"\\textit\{([^\}]*)\}"<em>$1</em>"g;
        s"\\textsl\{([^\}]*)\}"<em>$1</em>"g;
        s"\\meta\{([^\}]*)\}"&lt\;<em>$1</em>&gt\;"g;
        s"\\texttt\{([^\}]*)\}"<code>$1</code>"g;
	s"\\textbf\{([^\}]*)\}"<b>$1</b>"g;
	s"\\csx\{([^\}]*)\}"<code>\\$1</code>"g;
        s"\\parens\{([^\}]*)\}"$1"g;
        s"\\oparen\{\}""g;
        s"\\cparen\{\}""g;
	s"\~"\\textasciitilde{}"g if s"\\href\{([^\}]*)\}\{([^\}]*)\}"<a href=\"$1\">$2</a>";
        s"\\Q\{([^\}]*)\}""g;
        s"\\checked\{([^\}]*)\}\{([^\}]*)\}""g;
        s"\\footnote\{([^\}]*)\}""g;
	s"\\thinspace\{\}" "g;
        s"\\section\{([^\}]*)\}""g;
        s"\\subsection\{([^\}]*)\}""g;
        s"\$\\pi\$"<i>pi</i>"g;
        s"\$([^\$]*)\$"<i>$1</i>"g;
        s"\\ISBN\{([^\}]*)\}"ISBN $1"g;
        s"\\ProgName\|([^\|]*)\|"<i>$1</i>"g;
        s"\\ProgName\{([^\}]*)\}"<i>$1</i>"g;
        s"\\FontName\|([^\|]*)\|"<i>$1</i>"g;
        s"\\FontName\{([^\}]*)\}"<i>$1</i>"g;
        s"\\Package\|([^\|]*)\|"<i>$1</i>"g;
        s"\\Package\{([^\}]*)\}"<i>$1</i>"g;
        s"\\Class\|([^\|]*)\|"<i>$1</i>"g;
        s"\\Class\{([^\}]*)\}"<i>$1</i>"g;
        s"\\Email\|([^\|]*)\|"<i>$1</i>"g;
        s"\\mailto\|([^\|]*)\|"<a href\=\"mailto:$1\"><i>$1</i></a>"g;
        s"\\File\|([^\|]*)\|"<i>$1</i>"g;
        s"\\Newsgroup\|([^\|]*)\|"<i>$1</i>"g;
        s"\~"\\textasciitilde{}"g if s"\\URL\{([^\}]*)\}"\<a href\=\"$1\"\>$1\<\/a\>"g;
        s"\\FTP\|([^\|]*)\|"\<a href\=\"ftp\:\/\/$1\/\"\>$1\<\/a\>"g;
        s"\\CTAN\{([^\|]*)\}"\<a href\=\"ftp\://$arch/$root/$1\/\"\>$1\<\/a\>"g;
        s"\\Qref\[([^\]]*)\]\{([^\}]*)\}\{([^\}]*)\}"<a href\=\"$qref{$3}\">$2</a>"g;
	s"\\Qref\{([^\}]*)\}\{([^\}]*)\}"<a href\=\"$qref{$2}\">$1</a>"g;
	s"\\cmdinvoke\{([^\}]*)\}\{([^\}]*)\}\{([^\}]*)\}\{([^\}]*)\}\{([^\}]*)\}"<code>\\$1\{$2\}\{$3\}\{$4\}\{$5\}</code>"g;
	s"\\cmdinvoke\{([^\}]*)\}\[([^\]]*)\]\{([^\}]*)\}"<code>\\$1\[$2\]\{$3\}</code>"g;
	s"\\cmdinvoke\{([^\}]*)\}\{([^\}]*)\}\[([^\]]*)\]"<code>\\$1\{$2\}\[$3\]</code>"g;
	s"\\cmdinvoke\{([^\}]*)\}\{([^\}]*)\}\{([^\}]*)\}"<code>\\$1\{$2\}\{$3\}</code>"g;
	s"\\cmdinvoke\{([^\}]*)\}\{([^\}]*)\}"<code>\\$1\{$2\}</code>"g;
	s"\\cmdinvoke\{([^\}]*)\}\[([^\]]*)\]"<code>\\$1\[$2\]</code>"g;
	s"\\cmdinvoke\*\{([^\}]*)\}\{([^\}]*)\}\{([^\}]*)\}"<code>\\$1\{</code><em>$2</em><code>\}\{</code><em>$3</em><code>\}</code>"g;
	s"\\environment\{([^\}]*)\}"<code>$1</code>"g;
	s"\\pkgoption\{([^\}]*)\}"<code>$1</code>"g;
        s"\\path\|([^\|]*)\|"<i>$1</i>"g;
        s"\\begin\{htmlversion\}.*\n""g;
        s"\\end\{htmlversion\}.*\n""g;
        s"\\begin\{quote\}"<blockquote>"g;
        s"\\end\{quote\}"</blockquote>"g;
        s"\\begin\{description\}"<dl>"g;
        s"\\end\{description\}"</dl>"g;
        s"\\begin\{booklist\}"<dl>"g;
        s"\\end\{booklist\}"</dl>"g;
        s"\\begin\{proglist\}"<dl>"g;
        s"\\end\{proglist\}"</dl>"g;
        s"\\begin\{itemize\}"<ul>"g;
        s"\\end\{itemize\}"</ul>"g;
        s"\\begin\{enumerate\}"<ol>"g;
        s"\\end\{enumerate\}"</ol>"g;
	s"\\item\s*\[\\normalfont\{\}([^\]]*)\]"<dt>$1<dd>"g;
        s"\\item\s*\[([^\]]*)\]"<dt>$itemset$1$enditemset<dd>"g;
        s"\\item"<li>"g;
        s"\\\\(\[[^\]]*\])?"<br>"g;
        s"\|([^\|]+)\|"<code>$1</code>"g;
        s"\\\_"\_"g;
        s"\\textpercent"\%"g; # can't have \% in source...
        s"\\\$"\$"g;
        s"\\\#"\#"g;
        s"\\ " "g;
        s"\\\&"\&"g;
        s"\\\@""g;
        s"\\\;" "g;
        s"\\\," "g;
        s"\~" "g;
	s"\\nobreakspace" "g;
	s"\\textasciitilde"\~"g;
        s"\\textbar"\|"g;
        s"\\cs\<code\>"<code>\\"g;
        s"\&lbrace\;"\{";
        s"\&rbrace\;"\}";
	s"\\symbol\{([^\}]*)\}"$SymbolChar{$1}"g;
	s"\{\}""g;
	s"\\keywords\{([^\}]*)\}"<!-- $1 -->"g;
        s"\\relax""g;

	s"\\hphantom\{[^\}]*\}""g;
        s"\\nothtml\{[^\}]*\}""g;
	s"\\latexhtml\{[^\}]*\}\{([^\}]*)\}"$1"g;
	s"\\htmlonly\{([^\}]*)\}"$1"g;
    }
    if ( s"\\begin\{ctanrefs\}"<dl>"g ) {
	$itemset = "<tt><i>";
        $enditemset = "</i></tt>";
    }
    if ( s"\\end\{ctanrefs\}"</dl>"g ) {
	$itemset = "";
	$enditemset = "";
    }

    while ( /\\CTANref\{([^\}]*)\}/ ) {
	my $repl=generate_CTAN_ref("$1");
	s/\\CTANref\{([^\}]*)\}/$repl/;
    }

    $converting = 0 if s"\\begin\{verbatim\}"<pre>"g;
    $converting = 1 if s"\\end\{verbatim\}"</pre>"g;
    $ignoring++ if s"\\htmlignore""g;
    $ignoring-- if s"\\endhtmlignore""g;
    $ignoring++ if s"\\begin\{comment\}""g;
    $ignoring-- if s"\\end\{comment\}""g;
    $ignoring++ if s"\\begin\{footnoteenv\}""g;
    $ignoring-- if s"\\end\{footnoteenv\}""g;
    $_ = "" if $ignoring;
}

sub generate_CTAN_ref {
    if ( $ctanref_plus{$1} > 0 ) {
	$ret = "\<a href\=\"$proto_1://$arch_root$ctanref{$1}";
	$ret .= "$fmt_1\"\>$ctanref{$1}\<\/a\>";
	$ret .= " (\<a href\=\"$proto_2://$arch_root$ctanref{$1}$fmt_2\"\>$fmt_2_name\<\/a\>";
	$ret .= ", \<a href\=\"$proto_3://$host_d/$this_root/$ctanref{$1}$fmt_3\"\>$fmt_3_name\<\/a\>)";
    } elsif ( $ctanref_plus{$1} = 0 ) {
	$ret = "\<a href\=\"$proto_d://$host_d/$this_root/$ctanref{$1}\"\>" .
	       "$ctanref{$1}\<\/a\>";
    } else {
	$ret = "\<a href\=\"$proto_f://$arch_root$ctanref{$1}\"\>" .
	       "$ctanref{$1}\<\/a\>";
    }

    $ret;
}

1;