1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
#!/usr/bin/perl
# Convert an ej-formatted doc in to an html page
# Input: First argument or standard input
# Output: Standard output
$TMP = $ENV{'HOME'} . "/tmp";
# Make this just /tmp at your own risk. You have been warned.
if(! -d $TMP ) {
die "Fatal: Please create a directory entitled " . $TMP . "\n";
}
# Read in the doc
while(<>){$doc .= $_}
$* = 1; # Match multiple lines
# Get rid of <!-- ... --> comments
$doc =~ s|<\!\-\-.*?\-\->||sg;
# body flags
$bodyflags = "";
# Grab the header
if($doc =~ m|<head>(.*?)</head>|is) {
$header = $1;
# Get rid of the TH; HTML doesn't use it
$header =~ s|<th>.*?</th>||igs;
# Get rid of DTWIDTH too
$header =~ s|<dtwidth>.*?</dtwidth>||igs;
# Parse the body flags
if($header =~ s|<bodyflags>(.*?)</bodyflags>||igs) {
$bodyflags = $1;
}
}
else {
die "Fatal: Document must have a heading section\n";
}
# Make sure the header has
# <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=XXX">
# Where XXX is any character set
if($header !~
m|meta\s+http\-equiv\=\"content\-type\"\s+content\=\"text\/html\;\s+charset=|i)
{
print "Please have somthing like this:\n";
print
'<META HTTP-EQUIV="Content-Type" CONTENT="text/html; CHARSET=utf-8">';
print "\n";
die "Fatal: Header must declare charset\n";
}
# OK, the header looks kosher. Start generating html
print '<!-- Do *not* edit this file; it was automatically generated by';
print " ej2html\n";
print ' Look for a name.ej file with the same name as this filename -->';
print "\n";
# Timestamp
$ts = localtime(time());
print '<!-- Last updated ' . $ts . " -->\n";
print "\n";
print "<HTML><HEAD>\n";
print $header;
print "</HEAD><BODY $bodyflags>\n";
# Enough of header processing; let's get to the body of the document
# Grab the body
if($doc =~ m|<body>(.*?)</body>|is) {
$body = $1;
}
else {
die "Fatal: Document must have a body section\n";
}
$body = process_body($body);
#$body = fmt($body);
print($body);
print "</BODY></HTML>\n";
exit(0);
# And this processes the body (we do this way so we can recursively handle
# those pesky PRE flags)
sub process_body {
my($body) = @_;
my($hack,$filename);
# The INCLUDE and HINCLUDE tags
while($body =~ m|\<h?include\s+\"([^"]+)\"\s*\>|is) {
$filename = $1;
open(FILE,"< $filename") || die "Can not find file $filename\n";
$hack = "";
while(<FILE>) {$hack .= $_}
close(FILE);
#$hack = process_body($hack);
$body =~ s|\<h?include\s+\"([^"]+)\"\s*\>|$hack|is;
}
$body =~ s|\<\/?hibit[^>]*\>||ig;
return $body;
}
# This takes a string, and braks any lines longer than 75 columns; otherwise
# it performs no other formatting
# Input: The string to format
# Output: The formatted string
sub fmt {
my($input) = @_;
my($place,$lastspace,$column,$linebegin);
$place = $lastspace = $column = $linebegin = 0;
# Get rid of trailing white space, which confuses this algorithm
$input =~ s/[ \t]+\n/\n/sg;
# The core algorithm
while($place < length($input)) {
if(substr($input,$place,1) =~ /[ \t]/) {
$lastspace = $place;
}
if(substr($input,$place,1) =~ /\n/) {
$column = -1;
$linebegin = $lastspace = $place + 1;
}
if($column > 70 && $linebegin != $lastspace) {
substr($input,$lastspace,1,"\n");
$place = $lastspace;
$column = -1;
$linebegin = $lastspace = $place + 1;
}
$column++;
$place++;
}
$input;
}
|