$htmlonlyTitle

#!/usr/bin/perl # Extracts the code from LaTeX documents and outputs the documentation # into Javadoc-style comments. # # Author: Eric Buist # Summer 2002 and 2003 # The script parses its command-line arguments, reads the input # file and slices it into code blocks and text block. The contents # is inserted into a list of block and the text blocks' indices # are saved. All the text blocks are then merged together in order # to initiate a single run of LaTeX2HTML. The converted contents # is then parsed and distributed to the text blocks, which then # are transformed into Javadoc-style comments. The list of block # if finally merged and printed to the output file. use Cwd; use Getopt::Long; use File::Path; # To delete the temp directory use File::Spec; # Portability when using pathnames use strict; # success: used to indicate whether the ouput file has to be deleted # preamble: contains the preamble of the master LaTeX file # tempDir: contains the path of the temporary directory # outDir: directory where the output file will be # htmlOutDir: directory where the Javadoc HTML will go, used to put generated images # at the right place. # blockList: list of all code and text blocks # commentIndices: indices of all the text blocks into blockList # convertedComments: result of the LaTeX2HTML conversion split into chunks # classDocIndex: index, in blockList, of the class doc block # html: indicates if we perform the LaTeX to HTML conversion. # nocode: indicates if we convert a text-only file to HTML. # oldCurDir: stores the current directory when the script starts. # texDir: directory where the processed TeX file is. # masterFile: name of the master file, if any # htmlonlyTitle: Title of the HTML document in htmlonly mode. use vars qw($success $preamble $tempDir $outDir $htmlOutDir $classDocIndex @blockList @commentIndices @convertedComments $images $html $savelatex $htmlonly $oldCurDir $texDir $masterFile $htmlonlyTitle); # Initialization $oldCurDir = getcwd; my $TEXTMODE = 0; my $CODEMODE = 1; # Commands initiating a code block. # vcode is not there because it only indicate a code snippet # included in the documentation. my $CODECMDS = "code|smallcode|longcode"; $tempDir = ""; $success = 1; $images = ''; # default: no image generation $html = ''; # default: produces no html $savelatex = ''; # default: no saving for latex $htmlonly = ''; # default: convert a LaTeX file to Java $htmlonlyTitle = ''; $htmlOutDir = ''; # default: empty string, will be replaced by $outDir/html $masterFile = undef; # Check the arguments if (!GetOptions ('images!' => \$images, 'html!' => \$html, 'savelatex!' => \$savelatex, 'htmlonly' => \$htmlonly, 'htmltitle=s' => \$htmlonlyTitle, 'htmloutdir=s' => \$htmlOutDir, 'master=s' => \$masterFile) || (@ARGV < 1 || @ARGV > 2)) { print "usage: perl texjava.pl [-(no)images] [-(no)html] " . "[-(no)savelatex] [-(no)htmlonly] [-htmltitle title] [-htmloutdir dir] " . "[-master master] []\n"; exit 1; } # If no extension specified and the file does not exist, append .tex $ARGV[0] .= ".tex" if $ARGV[0] !~ /\..*$/ && !-e $ARGV[0]; # If the out file is not given, strip the extension and append .java if (!defined ($ARGV[1])) { $ARGV[1] = $ARGV[0]; $ARGV[1] =~ s/\..*$//; $ARGV[1] .= $htmlonly ? ".html" : ".java"; } else { # apppend .java or .html if necessary $ARGV[1] .= ($htmlonly ? ".html" : ".java") if $ARGV[1] !~ /\..*$/; } $masterFile .= ".tex" if defined ($masterFile) && $masterFile !~ /\..*$/ && !-e $masterFile; die ("filenames must be different") if $ARGV[0] eq $ARGV[1]; # Determine the directory where fout will be created or overwritten. # Convert to absolute path first, then strip the file name # We use library function for portability purposes, some # tasks could be done with regexps if UNIX was assumed. $outDir = File::Spec->rel2abs ($ARGV[1]) if !File::Spec->file_name_is_absolute ($ARGV[1]); my ($vol, $dir, $file) = File::Spec->splitpath ($outDir); $outDir = File::Spec->catpath ($vol, $dir, ""); ($vol, $dir, $file) = File::Spec->splitpath ($ARGV[0]); $texDir = File::Spec->catpath ($vol, $dir, ""); # If htmlOutDir is empty, set it with a default value. $htmlOutDir = File::Spec->catdir ($outDir, "html") if $htmlOutDir eq ''; # Open the master file pointed to by the third argument, if # it is specified, and put its LaTeX preamble into the preamble # variable. $preamble = ""; if (defined $masterFile) { open MASTERFILE, $masterFile or die "cannot open $masterFile"; $preamble = join ("", ); close MASTERFILE; # We do not want all the document, only the preamble. $preamble =~ s/\\begin\s*\{document\}.*$//s; } # Open the input and output files # Opening the out file now allows us to save work # if it cannot be opened later on. open (FIN, "<$ARGV[0]") or die ("cannot open input file $ARGV[0]"); $success = 0; # We have an outfile to be deleted in case of errors @blockList = (); @commentIndices = (); # Initialize the block parser to text mode, unhidden # and with an empty current block. my $currentBlockMode = $TEXTMODE; # the mode of the currently parsed block my $hideCount = 0; # number of open hide environments my $currentBlock = ""; # currently parsed block my $lastCodeBlock = ""; # the last parsed code block my $classDocPrinted = 0; # 1 if first block of text printed my $classDocBlock = ""; # first block of text, class documentation my $codeCmd = "CODE"; # command used to initiate the code block if ($htmlonly) { # We convert the entire LaTeX file to HTML. my $contents = join ('', ); push @blockList, $contents; push @commentIndices, 0; } else { while () { # Reads the file line by line and splits it into blocks # delimited by code environments. my $li = $_; # use a named variable instead of the default $_ while (length ($li) > 0) { # The loop allows processing more than one command # per line. Commands are order-sensitive. if ($currentBlockMode == $CODEMODE && $li =~ /^\/\*(\*+)/) { # A Javadoc-style comment is either in the code, or in # a literal string. Since this kind of things does not # occur in strings very often, we can transform it # a little bit. It will have the form /* * instead of /**. $currentBlock .= "/* $1"; $li = $'; } elsif ($currentBlockMode == $CODEMODE && $li =~ /^\/\*($CODECMDS|hide|endhide)\*\//io) { # The same thing as the previous condition. $currentBlock .= "/* $1 */"; $li = $'; } elsif ($li =~ /^\\($CODECMDS)(?![a-zA-Z])/o || $li =~ /^\\begin\s*\{\s*($CODECMDS)\s*\}/o) { # We have to look ahead for a non-alphanumeric character # to end the code command. Without this check, # some custom user commands (e.g., codebox) may # be processed as a code command. # Print an error message, ($. being the line number), # and exit the script if we already are in code mode. die ("l.$.: $li: \\begin{code} inside " . "\\begin{code} not permitted") if $currentBlockMode == $CODEMODE; # hide/endhide balancing # This is necessary to prevent # LaTeX"HTML from skipping HTML comments # marking the parts. # We use a special environment that does not exist instead # of the hide environment. # Before calling LaTeX2HTML, we will convert that # to hide and we will be able to remove these @hide's # to prevent them from appearing into the comment LaTeX # copy of the documentation. # for (my $i = 0; $i < $hideCount; $i++) { # $currentBlock .= "\\end{\@hide}"; # } my $cmd = $1; # save the used code command $li = $'; # save the part after \code for the next block. if ($classDocBlock eq "") { # When the first \code command is encountered, # currentBlock is the first block of text and # is considered the class # documentation block. This block # will be inserted in the first code block # just on top of the class declaration. $classDocBlock = $currentBlock; # classDocBlock must not be empty or we will # never output anything. $classDocBlock = " " if $classDocBlock eq ""; } else { # The current block is a text block because code # mode and text mode alternates. So we have # to process this text block and the last code block # in order to save the result to the block list. &processBlocks ($currentBlock, $lastCodeBlock, $classDocPrinted ? "" : $classDocBlock); $classDocPrinted = 1; } # If the command initiating the code block changes, # we add a marker in the code file. $cmd =~ tr/a-z/A-Z/; $currentBlock = $cmd ne $codeCmd && $savelatex ? "/*$cmd*/" : ""; $codeCmd = $cmd; $currentBlockMode = $CODEMODE; } elsif ($li =~ /^\\end($CODECMDS)(?![a-zA-Z])/o || $li =~ /^\\end\{($CODECMDS)\}/o) { die ("l.$.: $li: \\end{code} without corresponding " . "\\begin{code} found") if $currentBlockMode == $TEXTMODE; # Save the code block for future processing $lastCodeBlock = $currentBlock; $li = $'; # save the part after \endcode for the next block # a new text block starts $currentBlock = ""; # for (my $i = 0; $i < $hideCount; $i++) { # $currentBlock .= "\\begin{\@hide}"; # } $currentBlockMode = $TEXTMODE; } elsif ($li =~ /^\\hide(?![a-zA-Z])/ || $li =~ /^\\begin\s*\{\s*hide\s*\}/) { $hideCount++; # We were not in hide mode before \hide, so save to block $currentBlock .= "\\begin{hide}" if $currentBlockMode == $TEXTMODE; $currentBlock .= "/*HIDE*/" if $currentBlockMode == $CODEMODE; $li = $'; # allow further processing of the line } elsif ($li =~ /^\\endhide(?![a-zA-Z])/ || $li =~ /^\\end\s*\{\s*hide\s*\}/) { die ("l.$.: $li: \\end{hide} found without a \\begin{hide}") if !$hideCount; $currentBlock .= "\\end{hide}" if $currentBlockMode == $TEXTMODE; $currentBlock .= "/*ENDHIDE*/" if $currentBlockMode == $CODEMODE; $hideCount--; $li = $'; } elsif ($currentBlockMode == $TEXTMODE && $li =~ /^%/) { # LaTeX comment found, stop processing this line. $currentBlock .= $li; $li = ""; } else { # Reads only one character $currentBlock .= substr ($li, 0, 1); $li = substr ($li, 1); } } # End while for reading the line } # End while for reading FIN # Final balancing checkup die ("missing \\end{code}") if $currentBlockMode == $CODEMODE; die ("missing \\end{hide}") if $hideCount; # We are now in unhidden text mode and there is # a last unprocessed text block and code block. At worst, # both of these blocks would be empty. &processBlocks ($currentBlock, $lastCodeBlock, $classDocPrinted ? "" : $classDocBlock); } if ($html) { # Convert all the text blocks into HTML if ($images) { if (!-d $htmlOutDir) { mkpath ($htmlOutDir, 0, 0755); } if (!-x $htmlOutDir) { die ("Cannot access $htmlOutDir"); } } &createTempDir; &latexToHtml; } if (!$htmlonly) { # Transform the text blocks into Java comments incorporating # the HTML conversion and the original commented LaTeX contents. for (my $i = 0; $i < @commentIndices; $i++) { my $ind = $commentIndices[$i]; $blockList[$ind] = &processTextBlock ($blockList[$ind], $convertedComments[$ind], $ind == $classDocIndex); } # Post-procesing on the code blocks my $oldInd = -1; for (my $i = 0; $i <= @commentIndices; $i++) { my $ind = $i == @commentIndices ? @blockList : $commentIndices[$i]; for (my $j = $oldInd + 1; $j < $ind; $j++) { my $hiderx = '\/\*HIDE\*\/'; # my $hidetxt = '/*HIDE*/'; my $endHiderx = '\/\*ENDHIDE\*\/'; # my $endHidetxt = '/*ENDHIDE*/'; # Remove implicit hiding markers # $blockList[$j] =~ # s/$hiderx(\s*\{[\s\S]*?\})\s*$endHiderx/ # &countBraces ($1) == 0 ? $1 : "$hidetxt$1$endHidetxt"/geo # if $ind != $classDocIndex + 1; # Remove every hide markers if the LaTeX contents is not saved. $blockList[$j] =~ s/$hiderx|$endHiderx//go if !$savelatex; } $oldInd = $ind; } } else { $convertedComments[0] =~ s/^(\s*

|\s*<[BH]R>)*//gio; $convertedComments[0] =~ s/(

\s*|<[BH]R>\s*)*$//gio; $blockList[0] = "\n" . "\n\n$htmlonlyTitle\n" . "\n\n" . $convertedComments[0] . "\n\n"; } # Write the output file by merging every blocks open (FOUT, ">$ARGV[1]") or die ("cannot open output file $ARGV[1]"); print FOUT join ("", @blockList); close FOUT; $success = 1; # We are done, the END sub will be called automatically. sub END { # Close the two files. # Remove the output file if there was an errors. close FIN; &deleteTempDir; } ################################################################### # sub countBraces { # # Count the number of braces into txt. # # Returns the difference between the number of opening and closing # # braces. # my $txt = shift; # my $nob = $txt =~ tr/\{/\{/; # my $ncb = $txt =~ tr/\}/\}/; # return $nob - $ncb; # } sub createTempDir { # Looks for a temporary directory and creates a subdir in it. my @tempTry = (); if (exists $ENV{"TEMP"}) { push @tempTry, $ENV{"TEMP"}; } elsif (exists $ENV{"TMP"}) { push @tempTry, $ENV{"TMP"}; } elsif (exists $ENV{"TMPDIR"}) { push @tempTry, $ENV{"TMPDIR"}; } push @tempTry, "/tmp"; push @tempTry, $outDir; push @tempTry, "."; foreach my $tmp (@tempTry) { # We create a temporary subdirectory by appending # the current process ID to texjava. $tempDir = File::Spec->catfile ($tmp, "texjava$$"); return if mkdir ($tempDir, 0755); } $tempDir = ""; die "cannot create temporary subdirectory"; } sub deleteTempDir { # Deletes the created temporary subdirectory. if (length ($tempDir) > 0) { # We cannot die because die has already been called. print "cannot delete $tempDir\n" if !rmtree ([$tempDir], 0, 1); } } sub findLastDeclaration { # Looks for the last visible C/C++/Java function # or variable declaration into a code block. # Returns the position of the beginning of # the line of declaration, where one # can insert comments. # -1 is returned if no declaration is found. my $codeBlock = shift; # @_ contains the sub's arguments # Split using /*HIDE*/, /*ENDHIDE*/ my @subBlocks = split /(\/\*(?:END)?HIDE\*\/)/, $codeBlock; my $hideCount = 0; # We are in an hidden code subBlock my $fnPos = -1; # Pos of last seen declaration my $spos = 0; # Position of beginning sbl into textBlock foreach my $sbl (@subBlocks) { if ($sbl eq "/*HIDE*/") { $hideCount++; } elsif ($sbl eq "/*ENDHIDE*/") { $hideCount--; } elsif (!$hideCount) { # We are supposing that comments are always hidden # or at least there are no comments at the end of # the unhidden parts of code blocks. Under this assumption, # we can look for the last declaration by looking for # the last non-empty code line containing no more # closing parentheses than opening parentheses. my @lines = split /(\n+)/, $sbl; # split into lines my $oldFnPos = $fnPos; my $lpos = 0; for (my $l = 0; $l < @lines && $fnPos == $oldFnPos; $l++) { my $li = $lines[$l]; if ($li =~ /\S/) { $fnPos = $lpos + $spos; } $lpos += length ($li); } # my $lpos = length ($sbl); # position, into sbl, of the line # # n represents the number of open parentheses. # # If there are more closed parentheses than open # # parentheses, n is negative. # my $n = 0; # my $oldFnPos = $fnPos; # for (my $i = @lines - 1; $i >= 0 && $oldFnPos == $fnPos; $i--) { # my $li = $lines[$i]; # $lpos -= length ($li); # if ($li =~ /\S/) { # # We have a non-blank line. # # Calculate the number of opening and closins parentheses # my $nop = $li =~ tr/$/\(/; # my $ncp = $li =~ tr/$/\)/; # $n = $n + $nop - $ncp; # $fnPos = $lpos + $spos if $n == 0; # } # } } $spos += length ($sbl); } return $fnPos; } sub processBlocks { # Processes a block of text and a block of code. # The block of text will be processed and inserted at the top # of the last visible function in the block of code. # If a non-empty class documentation block is passed, it will also # be processed and inserted just on top of a class declaration, # or at the top of the code block if no class declaration is found. my ($textBlock, $codeBlock, $classDocBlock) = @_; my $atBottom = ""; # If no declaration is found, the best place to insert # the block is at its original location in the LaTeX file, # so we put it at the end of the code block. my $insPos = &findLastDeclaration ($codeBlock); if ($insPos == -1) { # No declaration found, everything goes at bottom. $atBottom = $textBlock; $textBlock = ""; } if ($textBlock =~ /(\\(sub)*section\*?|\\guisec|\\unmoved)(?![a-zA-Z])/) { # If a sectioning command is found, the text block must be splitted. # The part following the section (maybe a summary description) # will not be moved whereas the part preceeding it (method description) # will be moved. my $p = length $`; $atBottom = substr ($textBlock, $p); $textBlock = $`; } if ($atBottom ne "") { $atBottom = "% UNMOVED\n$atBottom"; } if ($classDocBlock ne "") { # If the class documentation is already printed, # the empty string will be received and we won't enter # here. # Find the insertion point of the classDoc block. # We find the beginning of the first line containing # an uncommented class keyword. If that cannot be found, # for instance in a C program, 0 will be kept. # The class declaration line is recognized by its keyword # class or interface not preceeded by any end-of-line # or comment characters. my $docPos = 0; $docPos = pos ($codeBlock) - length ($&) if $codeBlock =~ /^[^\*\/\n]*?(?:class|interface)/mg; # The first code block can also specify a package name # which must be taken into account for proper placement # of generated images, if any. if ($codeBlock =~ /^[^\*\/\n]*?package\s*(.*?);/m) { my $packageName = $1; # Convert the package name into a path. # We must replace the periods with the platform-specific # path delimiter. my @packageParts = split /\./, $packageName; $htmlOutDir = File::Spec->catdir ($htmlOutDir, @packageParts); } # Insert the class documentation into the code block if ($docPos <= $insPos) { push @blockList, substr ($codeBlock, 0, $docPos); push @blockList, $classDocBlock; push @commentIndices, $#blockList; $classDocIndex = $#blockList; if ($textBlock ne "") { push @blockList, substr ($codeBlock, $docPos, $insPos - $docPos); push @blockList, $textBlock; push @commentIndices, $#blockList; push @blockList, substr ($codeBlock, $insPos); } else { push @blockList, substr ($codeBlock, $docPos); } } else { if ($textBlock ne "") { push @blockList, substr ($codeBlock, 0, $insPos); push @blockList, $textBlock; push @commentIndices, $#blockList; push @blockList, substr ($codeBlock, $insPos, $docPos - $insPos); } else { push @blockList, substr ($codeBlock, 0, $docPos); } push @blockList, $classDocBlock; push @commentIndices, $#blockList; $classDocIndex = $#blockList; push @blockList, substr ($codeBlock, $docPos); } } else { if ($textBlock ne "") { push @blockList, substr ($codeBlock, 0, $insPos); push @blockList, $textBlock; push @commentIndices, $#blockList; push @blockList, substr ($codeBlock, $insPos); } else { push @blockList, $codeBlock; } } if ($atBottom ne "") { push @blockList, $atBottom; push @commentIndices, $#blockList; } } sub processTextBlock { # Format the text block to get Java comments # If isClassDoc is nonzero, less indentation is made # for the comments. my ($latexBlock, $htmlBlock, $isClassDoc) = @_; my $latexonly = 0; # Remove the uselesse

tags at beginning and end of the block $htmlBlock =~ s/^(\s*

|\s*<[BH]R>)*//gio; $htmlBlock =~ s/(

\s*|<[BH]R>\s*)*$//gio; # If the LaTeX block was empty, we insert no comments # at all. return "" if $latexBlock =~ /^\s*(% UNMOVED)?\s*$/; return "" if !$html && !$savelatex; # Normally, a sectioning block of text documents no # functions. If that happens, a dot is used to prevent # the insertion of HTML comments into the brief. $latexonly = 1 if $latexBlock =~ /(\\(sub)*section\*?|\\guisec)(?![a-zA-Z])/; # Since comments disturb Javadoc if they appear inside the # brief, the best way to avoid problems is by removing # them completely. However, we must keep the LaTeX contents # inside HTML comments. $htmlBlock =~ s///g; # We isolate the brief and check that there are no # tables or div in it. my $brief = $htmlBlock =~ /^([^.]*)\./ ? $1 : $htmlBlock; $htmlBlock = ".\n$htmlBlock" if $brief =~ / will output the same as --> # in LaTeX, without the confusion to HTML. # */ must also be transformed to prevent # an incorrect comment ending, even in the HTML file. $latexBlock =~ s//--{}>/g; $latexBlock =~ s/\*\//\*{}\//g; $htmlBlock =~ s/\*\//\*\//g; # When Java reads \u, it expects to get a unicode # character. Unfortunately, some LaTeX commands # start by \u. They will get replaced by \@u. # \@u already in the document will become \@ u, which # is normally equivalent since @ is a non-letter. $latexBlock =~ s/\\@([uU])/\\@ \1/g; $latexBlock =~ s/\\([uU])/\\@\1/g; # Now we are ready to return a formatted block # We must avoid HTML comments into the brief or # Javadoc will not format the documentation properly. $latexonly = 1 if $htmlBlock =~ /^(\s|

|
)*$/i; my $blk = $latexonly ? "" : $htmlBlock; if ($latexBlock =~ /\S/) { $blk .= "" if !$latexonly; $blk .= "\n"; } # Comment the Javadoc HTML code inside the output code. # Something is inserted only if the converted # text block contains at least one non-blank line. if ($blk =~ /\S/) { $blk =~ s/^\s+//; if ($isClassDoc) { $blk = $latexonly ? "/*LATEX\n$blk" : "/**\n$blk"; $blk =~ s/\n(?=.)/\n * /gs; $blk .= " */\n"; } else { $blk = $latexonly ? " /*LATEX\n$blk" : " /**\n$blk"; $blk =~ s/\n(?=.)/\n * /gs; $blk .= " */\n"; } } return $blk; } sub latexToHtml { # Gathers all the text blocks into a single LaTeX document # and pass them to LaTeX2HTML. # Using a temporary directory avoids garbage created by # LaTeX2HTML to appear into the html subdirectory. @convertedComments = (); my $texFile = ""; # Gather and merge the comments to form the core of the TeX file. for (my $i = 0; $i < @commentIndices; $i++) { my $ind = $commentIndices[$i]; # Some commands need to be converted to environments in order # for LaTeX2HTML to treat them properly. # These changes have an impact on the LaTeX contents # printed into the Javadoc comments, so it will remain # when reverting to LaTeX. $blockList[$ind] =~ s/\s*\\(tab+)\s*(?![a-zA-Z])/\n\\begin\{$1\}\n/g; $blockList[$ind] =~ s/\s*\\end(tab+)\s*(?![a-zA-Z])/\n\\end\{$1\}\n/g; $blockList[$ind] =~ s/\s*\\vcode\s*(?![a-zA-Z])/\n\\begin\{vcode\}\n/g; $blockList[$ind] =~ s/\s*\\endvcode\s*(?![a-zA-Z])/\n\\end\{vcode\}\n/g; $blockList[$ind] =~ s/\s*\\ifdetailed([^a-zA-Z][\s\S]*?)\s*\\fi\s*/ \n\\begin{detailed}$1\n\\end{detailed}\n/g; # Some modifications must not have impact on the LaTeX contents # printed into comments and stay local to documentation # blocks to prevent an error from altering all the contents # and markers delimiting the blocks. my $blk = $blockList[$ind]; $blk =~ s/\\iffalse[^a-zA-Z][\s\S]*?\\fi//g; # The extra begin and end hides that we added during block # parsing are converted to real hide environments # instructions whereas they are removed in the LaTeX # contents intended for appearing in the Javadoc. # This does not work anymore since blocks are sometimes splitted # a second time # $blk =~ s/\\(begin|end)\{\@hide\}/\\$1\{hide\}/g; # $blockList[$ind] =~ s/\\(begin|end)\{\@hide\}//g; # Track all beginnings and endings of hide environment. # To ensure proper processing by LaTeX2HTML, the hide # environments must be balanced in each individual # block of text. my $hideCount = 0; while ($blk =~ /\\((begin|end))\{hide\}/g) { my $b = $1; if ($b eq "begin") { $hideCount++; } else { $hideCount--; } } if ($hideCount > 0) { for (my $i = 0; $i < $hideCount; $i++) { $blk .= "\n\\end{hide}"; } } elsif ($hideCount < 0) { for (my $i = 0; $i < -$hideCount; $i++) { $blk = "\\begin{hide}\n$blk"; } } # Same issue for detailed environment my $detailedCount = 0; while ($blk =~ /\\((begin|end))\{detailed\}/g) { my $b = $1; if ($b eq "begin") { $detailedCount++; } else { $detailedCount--; } } if ($detailedCount > 0) { for (my $i = 0; $i < $detailedCount; $i++) { $blk .= "\n\\end{detailed}"; } } elsif ($detailedCount < 0) { for (my $i = 0; $i < -$detailedCount; $i++) { $blk = "\\begin{detailed}\n$blk"; } } # Append the block with a special marker. $texFile .= "\\begin{rawhtml}\n\n" . "\\end{rawhtml}\n$blk\n"; } # LaTeX2HTML has difficulty when handling user-defined commands. # Some cannot be easily implemented into the Perl language during # the processing. This is mainly due to the complexity # of LaTeX2HTML and lack of documentation about its # its internal processing. $texFile =~ s/\\eq(?![a-zA-Z])/\\[/g; $texFile =~ s/\\endeq(?![a-zA-Z])/\\]/g; $texFile =~ s/\\eqs(?![a-zA-Z])/\\begin{eqnarray*}/g; $texFile =~ s/\\endeqs(?![a-zA-Z])/\\end{eqnarray*}/g; $texFile =~ s/\\eqsn(?![a-zA-Z])/\\begin{eqnarray*}/g; $texFile =~ s/\\endeqsn(?![a-zA-Z])/\\end{eqnarray*}/g; $texFile =~ s/\$begin|end)\s*\{\s*equation\s*\}/\\$1\{displaymath\}/g; $texFile =~ s/\\(begin|end)\s*\{\s*eqnarray\s*\}/\\$1\{eqnarray*\}/g; $texFile =~ s/\\left\.?(?![a-zA-Z])//g; $texFile =~ s/\\right\.?(?![a-zA-Z])//g; $texFile =~ s/\\mathcal(?![a-zA-Z])//g; $texFile =~ s/\\min(?![a-zA-Z])/\\htmin/g; $texFile =~ s/\\max(?![a-zA-Z])/\\htmax/g; $texFile =~ s/\\inf(?![a-zA-Z])/\\htinf/g; $texFile =~ s/\\sup(?![a-zA-Z])/\\htsup/g; $texFile =~ s/\\int(?![a-zA-Z])/\\htint/g; $texFile =~ s/\\sum(?![a-zA-Z])/\\htsum/g; $texFile =~ s/\\prod(?![a-zA-Z])/\\htprod/g; $texFile =~ s/\\lim(?![a-zA-Z])/\\htlim/g; # References in parentheses are common (e.g. equation numbers) # and L2H will only ignore the \ref command, leaving () in HTML! $texFile =~ s/\s*\(\\ref\{.*?\}$//g; # A similar problem happens with \cite. Discarding \cite # can leave spaces before a word and the sentence-ending period. $texFile =~ s/\s*\\cite(?![a-zA-Z])/\\cite/g; # The \unmoved command is useless and can even disturb LaTeX2HTML # in some cases. So we must remove it from the TeX file passed to L2H. $texFile =~ s/\\unmoved(?![a-zA-Z])//g; # The created TeX file will have the same name as the input file # less the .tex extension. my $prefix = $ARGV[0]; $prefix =~ s/\.tex$//i; my($vol,$dir,$fn) = File::Spec->splitpath ($prefix); $prefix = $fn; my $texFileName = File::Spec->catfile ($tempDir, "$prefix.tex"); # Since LaTeX2HTML needs a file, we must create a # standalone LaTeX document. open (TEXFILE, ">$texFileName") or die "cannot create $texFileName"; my $selfMaster = $preamble eq ""; if (!$selfMaster) { print TEXFILE $preamble; print TEXFILE "\n\\begin{document}\n"; } print TEXFILE $texFile; if (!$selfMaster) { print TEXFILE "\n\\end{document}\n"; } close TEXFILE; # Construct a command line and call LaTeX2HTML my $cmdLine = "latex2html -split 0 -link 0 -nonavigation -nofootnode " . "-prefix $prefix -nosubdir -nomath -tmp \"$tempDir\" " . "-noinfo -noauto_link -noaddress " . ($images ? "-white " : "-nolatex -noimages ") . "-html_version 4.1,math,unicode \"$texFileName\""; # We cd to the directory where the TeX file is to allow # L2H to find .aux files. chdir ($texDir); system ($cmdLine) == 0 or die "could not execute latex2html successfully"; chdir ($oldCurDir); # Now, we have a bunch of files created by LaTeX2HTML. # Since splitting is disabled, we should have only one # HTML file but some images may have been created. # We need to get back the HTML contents but also # to copy the images. opendir (TMPDIR, $tempDir) or die "cannot open directory $tempDir"; my $fn = readdir (TMPDIR); my $htmlFile = undef; while (defined ($fn)) { next if $fn eq "." || $fn eq ".."; my $ffn = File::Spec->catfile ($tempDir, $fn); next if -d ($ffn) || !-r ($ffn); if (!defined ($htmlFile) && $ffn =~ /\.html?$/) { # The found file is readable, have an HTML extension # and no HTML file were previously found. # There should be only one HTML file open HTML, $ffn or die "cannot open $ffn"; $htmlFile = join ("", ); # Reads the file in one step close HTML; $htmlFile =~ s/(.*)<\/BODY>//is; # Only keep the body $htmlFile = $1; # Current browsers don't display { and } # properly and LaTeX2HTML outputs these sequences # when the Unicode extension is activated. $htmlFile =~ s/{/\{/g; $htmlFile =~ s/}/\}/g; # Sometimes, latex2html with the math extension # creates bad constructs such as &alpha#alpha;. # Only keep α. $htmlFile =~ s/&([a-zA-Z]+)\#.*?;/&\1;/g; # Some versions of LaTeX2HTML write mathend at each math expression. # The marker appears when the LaTeX file has some complications like # multine section titles, and maybe other multine commands! $htmlFile =~ s/\n?mathend000\#//g; } elsif ($images && $ffn =~ /\.(png|gif)$/) { # The found file is readable and is a GIF or PNG image. # All the images will simply be copied into the # html subdir of the outdir. open IMGIN, $ffn or die "cannot open $fn"; open IMGOUT, ">" . File::Spec->catfile ($htmlOutDir, $fn) or die "cannot create $fn"; print IMGOUT join ("", ); close IMGOUT; close IMGIN; } } continue { $fn = readdir (TMPDIR); } closedir TMPDIR; # This should never happen die "could not find the HTML output file" if !defined ($htmlFile); # Now, we have the converted LaTeX into the htmlFile variable. # We must split its contents and put the parts into corresponding # comment blocks. my @htmlFileParts = split /()/, $htmlFile; my $currentInd = -1; foreach my $sbl (@htmlFileParts) { if ($sbl =~ /^$/) { $currentInd = $1; } elsif ($currentInd != -1 || ($selfMaster && @htmlFileParts == 1)) { $currentInd = 0 if $currentInd == -1; # Sometimes, converted LaTeX blocks contain images # with alternative text containing LaTeX code. # We will assume that \u is never used in the converted # HTML, see processTextBlock for more information # about this thrick. $sbl =~ s/\\([uU])/\\@ \1/g; $convertedComments[$currentInd] .= $sbl; } } }