File: wikifyhtml.pl

package info (click to toggle)
emboss 6.6.0%2Bdfsg-12
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 571,584 kB
  • sloc: ansic: 460,579; java: 29,383; perl: 13,573; sh: 12,753; makefile: 3,294; csh: 706; asm: 351; xml: 239; pascal: 237; modula3: 8
file content (151 lines) | stat: -rwxr-xr-x 4,680 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/perl -w

%author = ("ableasby" => "Bleasby",
	   "price" => "Rice",
	   "ilongden" => "Longden",
	   "mschuster" => "Schuster",
    );

%types = ("Ione" => "Short",
	  "Ihtml" => "",
	  "Itable" => "Options",
	  "Usage" => "Example",
	  "Input" => "Input",
	  "Output" => "Output",
	  "History" => "History",
	  "Comment" => "Comment",
	  "Isee" => "Seealso"
    );

%names = ("Alignformats" => "AlignFormats",
	  "Assemblyformats" => "AssemblyFormats",
	  "Codonformats" => "CodonFormats",
	  "Featformats" => "FeatFormats",
	  "Ontologyformats" => "OntologyFormats",
	  "Reportformats" => "ReportFormats",
	  "Resourceformats" => "ResourceFormats",
	  "Seqformats" => "SeqFormats",
	  "Seqfeatformats" => "SeqfeatFormats",
	  "Taxonformats" => "TaxonFormats",
	  "Textformats" => "TextFormats",
	  "Urlformats" => "UrlFormats",
	  "Variationformats" => "VariationFormats",
	  "Assemblyinformats" => "AssemblyInFormats",
	  "Codoninformats" => "CodonInFormats",
	  "Featinformats" => "FeatInFormats",
	  "Ontologyinformats" => "OntologyInFormats",
	  "Resourceinformats" => "ResourceInFormats",
	  "Seqinformats" => "SeqInFormats",
	  "Seqfeatinformats" => "SeqfeatInFormats",
	  "Taxoninformats" => "TaxonInFormats",
	  "Textinformats" => "TextInFormats",
	  "Urlinformats" => "UrlInFormats",
	  "Variationformats" => "VariationFormats",
	  "Gcfiles" => "GeneticCodeFiles",
	  "Localfiles" => "LocalFiles",
	  "Graphicsdevices" => "GraphicsDevices",
	  "Drcat" => "Drcat",
	  "Edam" => "Edam",
	  "Go" => "Go",
	  "Taxon" => "Taxon",
	  "Jaspfiles" => "JasparFiles",
	  "Refiles" => "RebaseFiles",
	  "Jison-programs" => "JonIsonPrograms",
    );

use English;

$text = "";
while (<>) {
    s/^[ \t]+//;
    $text .= $_;
}

$text =~ s/^.*BEGIN MAIN CONTENT[^>]+>//gosm;
$text =~ s/(<\/?[Hh][Tt][Mm][Ll]>)//gosm;
$text =~ s/(<\/?[Bb][Oo][Dd][Yy][^>]*?>)//gosm;
$text =~ s/(<[Aa]\s+name\s*=\s*\"([^\"]+)[^>]+>)[^<]*<\/[Aa]>//gosm;
$text =~ s/<[Aa]\s+href\s*=\s*\"([^\".]+\.html)[^>]+>([^<]+)<\/[Aa]>/[http:\/\/evolution.genetics.washington.edu\/phylip\/doc\/$1 $2]/gosm;
$text =~ s/<[Aa]\s+href\s*=\s*\"([^\"]+)[^>]+>([^<]+)<\/[Aa]>/[$1| $2]/gosm;
$text =~ s/(<[Hh]\d>)\s*\n\s*/$1/gosm;
$text =~ s/\n(<\/[Hh]\d>)/$1/gosm;
$text =~ s/<\/[Dd][Ii][Vv]>//gosm;
$text =~ s/<[Hh][Rr]>//gosm;
$text =~ s/<[Hh]1>(.*?)<\/[Hh]1>/==$1==/gosm;
$text =~ s/<[Hh]2>(.*?)<\/[Hh]2>/==$1==/gosm;
$text =~ s/<[Hh]3>(.*?)<\/[Hh]3>/===$1===/gosm;
$text =~ s/<[Hh]4>(.*?)<\/[Hh]4>/====$1====/gosm;

$text =~ s/<[Pp]>/\n/gosm;
$text =~ s/<\/?[Tt][RrDd]>//gosm;
$text =~ s/<\/[Pp]>//gosm;
$text =~ s/(<\/?[Bb]>)/'''/gosm;
$text =~ s/(<\/?[Ee][Mm]>)/'''/gosm;
$text =~ s/(<\/?[Ii]>)/''/gosm;
$text =~ s/(<\/?[Ll][Ii]>)\n/$1/gosm;

$i=0;
$pre = 0;
$head=0;
$blank=0;
$table = 0;
$list = "";
while ($text =~ /.*?\n/gos) {
    $t = $MATCH;

    if($t =~ s/<[Hh][Ee][Aa][Dd]>//gosm) {$head=1}
    if($t =~ s/<\/[Hh][Ee][Aa][Dd]>//gosm) {$head=0;next}
    if($t =~ s/<!--#include file="header1.inc" -->//gosm) {$head=1}
    if($t =~ s/<!--#include file="header2.inc" -->//gosm) {$head=0;next}
    if($head) {next}
    if($t =~ s/<\/[Tt][Aa][Bb][Ll][Ee][^>]*>//gosm) {$table = 0}
    if($t =~ s/<[Tt][Aa][Bb][Ll][Ee][^>]*>//gosm) {$table = 1}
    if($t =~ s/<\/[OoUu][Ll]>//gosm) {$list = ""}
    if($t =~ s/<[Uu][Ll]>//gosm) {$list = "u";$nlist=0}
    if($t =~ s/<[Oo][Ll]>//gosm) {$list = "o";$nlist=0}
    if($t =~ s/<[Pp][Rr][Ee]>//gosm) {$pre = 1}
    if($t =~ s/<\/[Pp][Rr][Ee]>//gosm) {$pre = 0}

    if($t =~ /<[!]--[\#]include file=\"([^\"]+)\" -->/gosm){
	$ifile = $1;
	if($ifile =~ /inc\/([^.]+)[.]address/){
	    $aname = $1;
	    if(defined($author{$aname})){$aname = $author{$aname}}
	    else {$aname = ucfirst($aname)}
	    $t = "\{\{:Appinc:Address$aname\}\}\n";
	}
	elsif($ifile =~ /inc\/target([^.]*)[.]itxt/){
	    $tgtname = ucfirst($1);
	    $t = "\{\{:Appinc:Target$tgtname\}\}\n";
	}
	elsif($ifile =~ /inc\/([^.]+)[.]ihelp/){
	    $t = "";
	}
	elsif($ifile =~ /inc\/([^.]+)[.](.*)/){
	    $name = ucfirst($1);
	    $type=ucfirst($2);
	    if(defined($names{$name})){$name = $names{$name}}
	    if(defined($types{$type})){$type = $types{$type}}
	    if($type eq "Seealso") {
		print "==See also==\n";
	    }
	    $t = "\{\{:Appinc:$type$name\}\}\n";
	}
    }

    if(!$pre & $t =~ /^\n$/) {$blank++}
    else{$blank=0}
    $i++;
    if($blank > 1){next}
    if($blank == 1 && $list ne ""){next}
#    print "$i:";
    if($list ne "") {$t =~ s/\n/ /}
    if($pre) {print " "}
    if($t =~ s/<[Ll][Ii]>//gosm) {
	if($nlist++){print "\n"}
	if($list eq "u") {print "* "}
	elsif($list eq "o") {print "# "}
    }
    if($nlist && $list eq "") {print "\n";$nlist=0;}
    print "$t";
}