#!/usr/bin/perl

# Extracts the code from LaTeX documents and outputs the documentation
# into Javadoc-style comments.
#
# Author: Eric Buist
# Summer 2002 and 2003

# The script parses its command-line arguments, reads the input
# file and slices it into code blocks and text block. The contents
# is inserted into a list of block and the text blocks' indices
# are saved. All the text blocks are then merged together in order
# to initiate a single run of LaTeX2HTML. The converted contents
# is then parsed and distributed to the text blocks, which then
# are transformed into Javadoc-style comments. The list of block
# if finally merged and printed to the output file.

use Cwd;
use Getopt::Long;
use File::Path;             # To delete the temp directory
use File::Spec;             # Portability when using pathnames
use strict;

# success: used to indicate whether the ouput file has to be deleted
# preamble: contains the preamble of the master LaTeX file
# tempDir: contains the path of the temporary directory
# outDir: directory where the output file will be
# htmlOutDir: directory where the Javadoc HTML will go, used to put generated images
#  at the right place.
# blockList: list of all code and text blocks
# commentIndices: indices of all the text blocks into blockList
# convertedComments: result of the LaTeX2HTML conversion split into chunks
# classDocIndex: index, in blockList, of the class doc block
# html: indicates if we perform the LaTeX to HTML conversion.
# nocode: indicates if we convert a text-only file to HTML.
# oldCurDir: stores the current directory when the script starts.
# texDir: directory where the processed TeX file is.
# masterFile: name of the master file, if any
# htmlonlyTitle: Title of the HTML document in htmlonly mode.
use vars qw($success $preamble $tempDir $outDir $htmlOutDir  $classDocIndex
            @blockList @commentIndices @convertedComments
            $images $html $savelatex $htmlonly $oldCurDir
            $texDir $masterFile $htmlonlyTitle);

# Initialization

$oldCurDir = getcwd;
my $TEXTMODE = 0;
my $CODEMODE = 1;
# Commands initiating a code block.
# vcode is not there because it only indicate a code snippet
# included in the documentation.
my $CODECMDS = "code|smallcode|longcode";
$tempDir = "";

$success = 1;

$images = '';     # default: no image generation
$html = '';       # default: produces no html
$savelatex = '';  # default: no saving for latex
$htmlonly = ''; # default: convert a LaTeX file to Java
$htmlonlyTitle = '';
$htmlOutDir = ''; # default: empty string, will be replaced by $outDir/html
$masterFile = undef;
# Check the arguments
if (!GetOptions ('images!' => \$images,
                 'html!' => \$html,
                 'savelatex!' => \$savelatex,
                 'htmlonly' => \$htmlonly,
                 'htmltitle=s' => \$htmlonlyTitle,
                 'htmloutdir=s' => \$htmlOutDir,
                 'master=s' => \$masterFile) ||
    (@ARGV < 1 || @ARGV > 2)) {
    print "usage: perl texjava.pl [-(no)images] [-(no)html] "
        . "[-(no)savelatex] [-(no)htmlonly] [-htmltitle title] [-htmloutdir dir] "
        . "[-master master] <fin> [<fout>]\n";
    exit 1;
}

# If no extension specified and the file does not exist, append .tex
$ARGV[0] .= ".tex" if $ARGV[0] !~ /\..*$/ && !-e $ARGV[0];

# If the out file is not given, strip the extension and append .java
if (!defined ($ARGV[1])) {
    $ARGV[1] = $ARGV[0];
    $ARGV[1] =~ s/\..*$//;
    $ARGV[1] .= $htmlonly ? ".html" : ".java";
}
else {
    # apppend .java or .html if necessary
    $ARGV[1] .= ($htmlonly ? ".html" : ".java") if $ARGV[1] !~ /\..*$/;
}

$masterFile .= ".tex" if defined ($masterFile) && $masterFile !~ /\..*$/ && !-e $masterFile;

die ("filenames must be different") if $ARGV[0] eq $ARGV[1];

# Determine the directory where fout will be created or overwritten.
# Convert to absolute path first, then strip the file name
# We use library function for portability purposes, some
# tasks could be done with regexps if UNIX was assumed.
$outDir = File::Spec->rel2abs ($ARGV[1])
    if !File::Spec->file_name_is_absolute ($ARGV[1]);
my ($vol, $dir, $file) = File::Spec->splitpath ($outDir);
$outDir = File::Spec->catpath ($vol, $dir, "");

($vol, $dir, $file) = File::Spec->splitpath ($ARGV[0]);
$texDir = File::Spec->catpath ($vol, $dir, "");

# If htmlOutDir is empty, set it with a default value.
$htmlOutDir = File::Spec->catdir ($outDir, "html") if $htmlOutDir eq '';

# Open the master file pointed to by the third argument, if
# it is specified, and put its LaTeX preamble into the preamble
# variable.
$preamble = "";
if (defined $masterFile) {
    open MASTERFILE, $masterFile or die "cannot open $masterFile";
    $preamble = join ("", <MASTERFILE>);
    close MASTERFILE;

    # We do not want all the document, only the preamble.
    $preamble =~ s/\\begin\s*\{document\}.*$//s;
}

# Open the input and output files
# Opening the out file now allows us to save work
# if it cannot be opened later on.
open (FIN, "<$ARGV[0]") or die ("cannot open input file $ARGV[0]");

$success = 0;     # We have an outfile to be deleted in case of errors
@blockList = ();
@commentIndices = ();

# Initialize the block parser to text mode, unhidden
# and with an empty current block.
my $currentBlockMode = $TEXTMODE;  # the mode of the currently parsed block
my $hideCount = 0;                 # number of open hide environments
my $currentBlock = "";             # currently parsed block
my $lastCodeBlock = "";            # the last parsed code block
my $classDocPrinted = 0;           # 1 if first block of text printed
my $classDocBlock = "";            # first block of text, class documentation
my $codeCmd = "CODE";              # command used to initiate the code block

if ($htmlonly) {
    # We convert the entire LaTeX file to HTML.
    my $contents = join ('', <FIN>);
    push @blockList, $contents;
    push @commentIndices, 0;
}
else {
    while (<FIN>) {
        # Reads the file line by line and splits it into blocks
        # delimited by code environments.

        my $li = $_;      # use a named variable instead of the default $_

        while (length ($li) > 0) {
            # The loop allows processing more than one command
            # per line. Commands are order-sensitive.

            if ($currentBlockMode == $CODEMODE &&
                $li =~ /^\/\*(\*+)/) {
                # A Javadoc-style comment is either in the code, or in
                # a literal string. Since this kind of things does not
                # occur in strings very often, we can transform it
                # a little bit. It will have the form /* * instead of /**.
                $currentBlock .= "/* $1";
                $li = $';
    } elsif ($currentBlockMode == $CODEMODE &&
        $li =~ /^\/\*($CODECMDS|hide|endhide)\*\//io) {
       # The same thing as the previous condition.
       $currentBlock .= "/* $1 */";
       $li = $';
            } elsif ($li =~ /^\\($CODECMDS)(?![a-zA-Z])/o ||
                     $li =~ /^\\begin\s*\{\s*($CODECMDS)\s*\}/o) {
                # We have to look ahead for a non-alphanumeric character
                # to end the code command. Without this check,
                # some custom user commands (e.g., codebox) may
                # be processed as a code command.

                # Print an error message, ($. being the line number),
                # and exit the script if we already are in code mode.
                die ("l.$.: $li: \\begin{code} inside " .
                     "\\begin{code} not permitted")
                    if $currentBlockMode == $CODEMODE;

                # hide/endhide balancing
                # This is necessary to prevent
                # LaTeX"HTML from skipping HTML comments
                # marking the parts.
                # We use a special environment that does not exist instead
                # of the hide environment.
                # Before calling LaTeX2HTML, we will convert that
                # to hide and we will be able to remove these @hide's
                # to prevent them from appearing into the comment LaTeX
                # copy of the documentation.
                # for (my $i = 0; $i < $hideCount; $i++) {
                #    $currentBlock .= "\\end{\@hide}";
                # }
                
                my $cmd = $1;   # save the used code command
                $li = $';      # save the part after \code for the next block.

       if ($classDocBlock eq "") {
          # When the first \code command is encountered,
          # currentBlock is the first block of text and
          # is considered the class
          # documentation block. This block
          # will be inserted in the first code block
          # just on top of the class declaration.
          $classDocBlock = $currentBlock;

          # classDocBlock must not be empty or we will
          # never output anything.
          $classDocBlock = " " if $classDocBlock eq "";
       } else {
          # The current block is a text block because code
          # mode and text mode alternates. So we have
          # to process this text block and the last code block
          # in order to save the result to the block list.
          &processBlocks ($currentBlock, $lastCodeBlock,
                $classDocPrinted ? "" : $classDocBlock);
          $classDocPrinted = 1;
       }

       # If the command initiating the code block changes,
       # we add a marker in the code file.
       $cmd =~ tr/a-z/A-Z/;
       $currentBlock = $cmd ne $codeCmd && $savelatex
         ? "/*$cmd*/" : "";
       $codeCmd = $cmd;
       $currentBlockMode = $CODEMODE;
    } elsif ($li =~ /^\\end($CODECMDS)(?![a-zA-Z])/o ||
        $li =~ /^\\end\{($CODECMDS)\}/o) {
       die ("l.$.: $li: \\end{code} without corresponding "
       . "\\begin{code} found")
         if $currentBlockMode == $TEXTMODE;

       # Save the code block for future processing
       $lastCodeBlock = $currentBlock;
   
       $li = $';      # save the part after \endcode for the next block

                # a new text block starts
                $currentBlock = "";
                # for (my $i = 0; $i < $hideCount; $i++) {
                #   $currentBlock .= "\\begin{\@hide}";
                # }
                $currentBlockMode = $TEXTMODE;
            } elsif ($li =~ /^\\hide(?![a-zA-Z])/ ||
                     $li =~ /^\\begin\s*\{\s*hide\s*\}/) {
                $hideCount++;

                # We were not in hide mode before \hide, so save to block
                $currentBlock .= "\\begin{hide}"
                    if $currentBlockMode == $TEXTMODE;
                $currentBlock .= "/*HIDE*/" if $currentBlockMode == $CODEMODE;
                $li = $';      # allow further processing of the line
    } elsif ($li =~ /^\\endhide(?![a-zA-Z])/ ||
        $li =~ /^\\end\s*\{\s*hide\s*\}/) {
       die ("l.$.: $li: \\end{hide} found without a \\begin{hide}")
         if !$hideCount;
   
       $currentBlock .= "\\end{hide}" if $currentBlockMode == $TEXTMODE;
       $currentBlock .= "/*ENDHIDE*/" if $currentBlockMode == $CODEMODE;
       $hideCount--;
       $li = $';
            } elsif ($currentBlockMode == $TEXTMODE && $li =~ /^%/) {
                # LaTeX comment found, stop processing this line.
                $currentBlock .= $li;
                $li = "";
            } else {
                # Reads only one character
                $currentBlock .= substr ($li, 0, 1);
                $li = substr ($li, 1);
            }
        }            # End while for reading the line
    }            # End while for reading FIN


    # Final balancing checkup
    die ("missing \\end{code}") if $currentBlockMode == $CODEMODE;
    die ("missing \\end{hide}") if $hideCount;

    # We are now in unhidden text mode and there is
    # a last unprocessed text block and code block. At worst,
    # both of these blocks would be empty.
    &processBlocks ($currentBlock, $lastCodeBlock,
                    $classDocPrinted ? "" : $classDocBlock);
}

if ($html) {
    # Convert all the text blocks into HTML

    if ($images) {
        if (!-d $htmlOutDir) {
            mkpath ($htmlOutDir, 0, 0755);
        }
        if (!-x $htmlOutDir) {
            die ("Cannot access $htmlOutDir");
        }
    }
    &createTempDir;

    &latexToHtml;
}

if (!$htmlonly) {
    # Transform the text blocks into Java comments incorporating
    # the HTML conversion and the original commented LaTeX contents.
    for (my $i = 0; $i < @commentIndices; $i++) {
        my $ind = $commentIndices[$i];
        $blockList[$ind] = &processTextBlock ($blockList[$ind],
                                              $convertedComments[$ind],
                                              $ind == $classDocIndex);
    }

    # Post-procesing on the code blocks
    my $oldInd = -1;
    for (my $i = 0; $i <= @commentIndices; $i++) {
        my $ind = $i == @commentIndices ? @blockList : $commentIndices[$i];
        for (my $j = $oldInd + 1; $j < $ind; $j++) {
            my $hiderx = '\/\*HIDE\*\/';
            # my $hidetxt = '/*HIDE*/';
            my $endHiderx = '\/\*ENDHIDE\*\/';
            # my $endHidetxt = '/*ENDHIDE*/';

            # Remove implicit hiding markers
            #       $blockList[$j] =~
            #    s/$hiderx(\s*\{[\s\S]*?\})\s*$endHiderx/
            #      &countBraces ($1) == 0 ? $1 : "$hidetxt$1$endHidetxt"/geo
            #        if $ind != $classDocIndex + 1;

            # Remove every hide markers if the LaTeX contents is not saved.
            $blockList[$j] =~ s/$hiderx|$endHiderx//go if !$savelatex;
        }
        $oldInd = $ind;
    }
}
else {
    $convertedComments[0] =~ s/^(\s*<P>|\s*<[BH]R>)*//gio;
    $convertedComments[0] =~ s/(<P>\s*|<[BH]R>\s*)*$//gio;
    $blockList[0] = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\n"
        . "<HTML>\n<HEAD>\n<TITLE>$htmlonlyTitle</TITLE>\n"
        . "</HEAD>\n<BODY>\n"
        . $convertedComments[0]
        . "</BODY>\n</HTML>\n";
}

# Write the output file by merging every blocks
open (FOUT, ">$ARGV[1]") or die ("cannot open output file $ARGV[1]");
print FOUT join ("", @blockList);
close FOUT;

$success = 1;

# We are done, the END sub will be called automatically.

sub END {
    # Close the two files.
    # Remove the output file if there was an errors.

    close FIN;
    &deleteTempDir;
}


###################################################################


# sub countBraces {
#    # Count the number of braces into txt.
#    # Returns the difference between the number of opening and closing
#    # braces.

#    my $txt = shift;

#    my $nob = $txt =~ tr/\{/\{/;
#    my $ncb = $txt =~ tr/\}/\}/;

#    return $nob - $ncb;
# }

sub createTempDir {
    # Looks for a temporary directory and creates a subdir in it.

    my @tempTry = ();

    if (exists $ENV{"TEMP"}) {
        push @tempTry, $ENV{"TEMP"};
    }
    elsif (exists $ENV{"TMP"}) {
        push @tempTry, $ENV{"TMP"};
    }
    elsif (exists $ENV{"TMPDIR"}) {
        push @tempTry, $ENV{"TMPDIR"};
    }
    push @tempTry, "/tmp";
    push @tempTry, $outDir;
    push @tempTry, ".";

    foreach my $tmp (@tempTry) {
        # We create a temporary subdirectory by appending
        # the current process ID to texjava.
        $tempDir = File::Spec->catfile ($tmp, "texjava$$");
        return if mkdir ($tempDir, 0755);
    }
    $tempDir = "";
    die "cannot create temporary subdirectory";
}

sub deleteTempDir {
    # Deletes the created temporary subdirectory.

    if (length ($tempDir) > 0) {
        # We cannot die because die has already been called.
        print "cannot delete $tempDir\n" if !rmtree ([$tempDir], 0, 1);
    }
}

sub findLastDeclaration {
    # Looks for the last visible C/C++/Java function
    # or variable declaration into a code block.
    # Returns the position of the beginning of
    # the line of declaration, where one
    # can insert comments.
    # -1 is returned if no declaration is found.

    my $codeBlock = shift;       # @_ contains the sub's arguments

    # Split using /*HIDE*/, /*ENDHIDE*/
    my @subBlocks = split /(\/\*(?:END)?HIDE\*\/)/, $codeBlock;
    my $hideCount = 0;       # We are in an hidden code subBlock
    my $fnPos = -1;          # Pos of last seen declaration
    my $spos = 0;            # Position of beginning sbl into textBlock
    foreach my $sbl (@subBlocks) {
        if ($sbl eq "/*HIDE*/") {
            $hideCount++;
        }
        elsif ($sbl eq "/*ENDHIDE*/") {
            $hideCount--;
        }
        elsif (!$hideCount) {
            # We are supposing that comments are always hidden
            # or at least there are no comments at the end of
            # the unhidden parts of code blocks. Under this assumption,
            # we can look for the last declaration by looking for
            # the last non-empty code line containing no more
            # closing parentheses than opening parentheses.

            my @lines = split /(\n+)/, $sbl;  # split into lines
            my $oldFnPos = $fnPos;
            my $lpos = 0;
            for (my $l = 0; $l < @lines && $fnPos == $oldFnPos; $l++) {
                my $li = $lines[$l];
                if ($li =~ /\S/) {
                    $fnPos = $lpos + $spos;
                }
                $lpos += length ($li);
            }

#     my $lpos = length ($sbl);       # position, into sbl, of the line

#     # n represents the number of open parentheses.
#     # If there are more closed parentheses than open
#     # parentheses, n is negative.
#     my $n = 0;
#     my $oldFnPos = $fnPos;
#     for (my $i = @lines - 1; $i >= 0 && $oldFnPos == $fnPos; $i--) {
#        my $li = $lines[$i];
#        $lpos -= length ($li);
#        if ($li =~ /\S/) {
#           # We have a non-blank line.
#           # Calculate the number of opening and closins parentheses

#           my $nop = $li =~ tr/\(/\(/;
#           my $ncp = $li =~ tr/\)/\)/;
#           $n = $n + $nop - $ncp;
#           $fnPos = $lpos + $spos if $n == 0;
#        }
#     }
        }
        $spos += length ($sbl);
    }
    return $fnPos;
}

sub processBlocks {
    # Processes a block of text and a block of code.
    # The block of text will be processed and inserted at the top
    # of the last visible function in the block of code.
    # If a non-empty class documentation block is passed, it will also
    # be processed and inserted just on top of a class declaration,
    # or at the top of the code block if no class declaration is found.

    my ($textBlock, $codeBlock, $classDocBlock) = @_;
    my $atBottom = "";

    # If no declaration is found, the best place to insert
    # the block is at its original location in the LaTeX file,
    # so we put it at the end of the code block.
    my $insPos = &findLastDeclaration ($codeBlock);
    if ($insPos == -1) {
        # No declaration found, everything goes at bottom.
        $atBottom = $textBlock;
        $textBlock = "";
    }
    if ($textBlock =~ /(\\(sub)*section\*?|\\guisec|\\unmoved)(?![a-zA-Z])/) {
        # If a sectioning command is found, the text block must be splitted.
        # The part following the section (maybe a summary description)
        # will not be moved whereas the part preceeding it (method description)
        # will be moved.
        my $p = length $`;
        $atBottom = substr ($textBlock, $p);
        $textBlock = $`;
    }
    if ($atBottom ne "") {
        $atBottom = "% UNMOVED\n$atBottom";
    }

    if ($classDocBlock ne "") {
        # If the class documentation is already printed,
        # the empty string will be received and we won't enter
        # here.

        # Find the insertion point of the classDoc block.
        # We find the beginning of the first line containing
        # an uncommented class keyword. If that cannot be found,
        # for instance in a C program, 0 will be kept.
        # The class declaration line is recognized by its keyword
        # class or interface not preceeded by any end-of-line
        # or comment characters.
        my $docPos = 0;
        $docPos = pos ($codeBlock) - length ($&)
            if $codeBlock =~ /^[^\*\/\n]*?(?:class|interface)/mg;

        # The first code block can also specify a package name
        # which must be taken into account for proper placement
        # of generated images, if any.
        if ($codeBlock =~ /^[^\*\/\n]*?package\s*(.*?);/m) {
            my $packageName = $1;
            # Convert the package name into a path.
            # We must replace the periods with the platform-specific
            # path delimiter.
            my @packageParts = split /\./, $packageName;
            $htmlOutDir = File::Spec->catdir ($htmlOutDir, @packageParts);
        }

        # Insert the class documentation into the code block
        if ($docPos <= $insPos) {
            push @blockList, substr ($codeBlock, 0, $docPos);
            push @blockList, $classDocBlock;
            push @commentIndices, $#blockList;
            $classDocIndex = $#blockList;
            if ($textBlock ne "") {
                push @blockList, substr ($codeBlock, $docPos, $insPos - $docPos);
                push @blockList, $textBlock;
                push @commentIndices, $#blockList;
                push @blockList, substr ($codeBlock, $insPos);
            }
            else {
                push @blockList, substr ($codeBlock, $docPos);
            }
        }
        else {
            if ($textBlock ne "") {
                push @blockList, substr ($codeBlock, 0, $insPos);
                push @blockList, $textBlock;
                push @commentIndices, $#blockList;
                push @blockList, substr ($codeBlock, $insPos, $docPos - $insPos);
            }
            else {
                push @blockList, substr ($codeBlock, 0, $docPos);
            }
            push @blockList, $classDocBlock;
            push @commentIndices, $#blockList;
            $classDocIndex = $#blockList;
            push @blockList, substr ($codeBlock, $docPos);
        }
    }
    else {
        if ($textBlock ne "") {
            push @blockList, substr ($codeBlock, 0, $insPos);
            push @blockList, $textBlock;
            push @commentIndices, $#blockList;
            push @blockList, substr ($codeBlock, $insPos);
        }
        else {
            push @blockList, $codeBlock;
        }
    }
    if ($atBottom ne "") {
        push @blockList, $atBottom;
        push @commentIndices, $#blockList;
    }
}

sub processTextBlock {
    # Format the text block to get Java comments
    # If isClassDoc is nonzero, less indentation is made
    # for the comments.

    my ($latexBlock, $htmlBlock, $isClassDoc) = @_;
    my $latexonly = 0;

    # Remove the uselesse <P> tags at beginning and end of the block
    $htmlBlock =~ s/^(\s*<P>|\s*<[BH]R>)*//gio;
    $htmlBlock =~ s/(<P>\s*|<[BH]R>\s*)*$//gio;

    # If the LaTeX block was empty, we insert no comments
    # at all.
    return "" if $latexBlock =~ /^\s*(% UNMOVED)?\s*$/;
    return "" if !$html && !$savelatex;

    # Normally, a sectioning block of text documents no
    # functions. If that happens, a dot is used to prevent
    # the insertion of HTML comments into the brief.
    $latexonly = 1
        if $latexBlock =~ /(\\(sub)*section\*?|\\guisec)(?![a-zA-Z])/;

    # Since comments disturb Javadoc if they appear inside the
    # brief, the best way to avoid problems is by removing
    # them completely. However, we must keep the LaTeX contents
    # inside HTML comments.
    $htmlBlock =~ s/<!--[\s\S]*?-->//g;

    # We isolate the brief and check that there are no
    # tables or div in it.
    my $brief = $htmlBlock =~ /^([^.]*)\./ ? $1 : $htmlBlock;
    $htmlBlock = ".\n$htmlBlock" if $brief =~ /<DIV|<TABLE|IMG/i;

    $latexBlock = "" if !$savelatex;

    # We must not have an HTML comment ending inside the
    # LaTeX-only block, --{}> will output the same as -->
    # in LaTeX, without the confusion to HTML.
    # */ must also be transformed to prevent
    # an incorrect comment ending, even in the HTML file.
    $latexBlock =~ s/<!--/<!{}--/g;
    $latexBlock =~ s/-->/--{}>/g;
    $latexBlock =~ s/\*\//\*{}\//g;
    $htmlBlock =~ s/\*\//\*<!-- -->\//g;

    # When Java reads \u, it expects to get a unicode
    # character. Unfortunately, some LaTeX commands
    # start by \u. They will get replaced by \@u.
    # \@u already in the document will become \@ u, which
    # is normally equivalent since @ is a non-letter.
    $latexBlock =~ s/\\@([uU])/\\@ \1/g;
    $latexBlock =~ s/\\([uU])/\\@\1/g;

    # Now we are ready to return a formatted block
    # We must avoid HTML comments into the brief or
    # Javadoc will not format the documentation properly.
    $latexonly = 1 if $htmlBlock =~ /^(\s|<P>|<BR>)*$/i;
    my $blk = $latexonly ? "" : $htmlBlock;
    if ($latexBlock =~ /\S/) {
        $blk .= "<!--LATEX\n" if !$latexonly;
        $blk .= $latexBlock;
        $blk .= "-->" if !$latexonly;
        $blk .= "\n";
    }

    # Comment the Javadoc HTML code inside the output code.
    # Something is inserted only if the converted
    # text block contains at least one non-blank line.
    if ($blk =~ /\S/) {
        $blk =~ s/^\s+//;
        if ($isClassDoc) {
            $blk = $latexonly ? "/*LATEX\n$blk" : "/**\n$blk";
            $blk =~ s/\n(?=.)/\n * /gs;
            $blk .= " */\n";
        }
        else {
            $blk = $latexonly ? "   /*LATEX\n$blk" : "   /**\n$blk";
            $blk =~ s/\n(?=.)/\n    * /gs;
            $blk .= "    */\n";
        }
    }
    return $blk;
}

sub latexToHtml {
    # Gathers all the text blocks into a single LaTeX document
    # and pass them to LaTeX2HTML.
    # Using a temporary directory avoids garbage created by
    # LaTeX2HTML to appear into the html subdirectory.

    @convertedComments = ();

    my $texFile = "";

    # Gather and merge the comments to form the core of the TeX file.
    for (my $i = 0; $i < @commentIndices; $i++) {
        my $ind = $commentIndices[$i];

        # Some commands need to be converted to environments in order
        # for LaTeX2HTML to treat them properly.
        # These changes have an impact on the LaTeX contents
        # printed into the Javadoc comments, so it will remain
        # when reverting to LaTeX.
        $blockList[$ind] =~ s/\s*\\(tab+)\s*(?![a-zA-Z])/\n\\begin\{$1\}\n/g;
        $blockList[$ind] =~ s/\s*\\end(tab+)\s*(?![a-zA-Z])/\n\\end\{$1\}\n/g;
        $blockList[$ind] =~ s/\s*\\vcode\s*(?![a-zA-Z])/\n\\begin\{vcode\}\n/g;
        $blockList[$ind] =~ s/\s*\\endvcode\s*(?![a-zA-Z])/\n\\end\{vcode\}\n/g;
        $blockList[$ind] =~ s/\s*\\ifdetailed([^a-zA-Z][\s\S]*?)\s*\\fi\s*/
            \n\\begin{detailed}$1\n\\end{detailed}\n/g;

        # Some modifications must not have impact on the LaTeX contents
        # printed into comments and stay local to documentation
        # blocks to prevent an error from altering all the contents
        # and markers delimiting the blocks.
        my $blk = $blockList[$ind];
        $blk =~ s/\\iffalse[^a-zA-Z][\s\S]*?\\fi//g;

        # The extra begin and end hides that we added during block
        # parsing are converted to real hide environments
        # instructions whereas they are removed in the LaTeX
        # contents intended for appearing in the Javadoc.
        # This does not work anymore since blocks are sometimes splitted
        # a second time
        # $blk =~ s/\\(begin|end)\{\@hide\}/\\$1\{hide\}/g;
        # $blockList[$ind] =~ s/\\(begin|end)\{\@hide\}//g;

        # Track all beginnings and endings of hide environment.
        # To ensure proper processing by LaTeX2HTML, the hide
        # environments must be balanced in each individual
        # block of text.
        my $hideCount = 0;
        while ($blk =~ /\\((begin|end))\{hide\}/g) {
            my $b = $1;
            if ($b eq "begin") {
                $hideCount++;
            }
            else {
                $hideCount--;
            }
        }
        if ($hideCount > 0) {
            for (my $i = 0; $i < $hideCount; $i++) {
                $blk .= "\n\\end{hide}";
            }
        }
        elsif ($hideCount < 0) {
            for (my $i = 0; $i < -$hideCount; $i++) {
                $blk = "\\begin{hide}\n$blk";
            }
        }

        # Same issue for detailed environment
        my $detailedCount = 0;
        while ($blk =~ /\\((begin|end))\{detailed\}/g) {
            my $b = $1;
            if ($b eq "begin") {
                $detailedCount++;
            }
            else {
                $detailedCount--;
            }
        }
        if ($detailedCount > 0) {
            for (my $i = 0; $i < $detailedCount; $i++) {
                $blk .= "\n\\end{detailed}";
            }
        }
        elsif ($detailedCount < 0) {
            for (my $i = 0; $i < -$detailedCount; $i++) {
                $blk = "\\begin{detailed}\n$blk";
            }
        }

        # Append the block with a special marker.
        $texFile .= "\\begin{rawhtml}\n<!--TEXJAVA:$ind -->\n" .
            "\\end{rawhtml}\n$blk\n";
    }

    # LaTeX2HTML has difficulty when handling user-defined commands.
    # Some cannot be easily implemented into the Perl language during
    # the processing. This is mainly due to the complexity
    # of LaTeX2HTML and lack of documentation about its
    # its internal processing.
    $texFile =~ s/\\eq(?![a-zA-Z])/\\[/g;
                                      $texFile =~ s/\\endeq(?![a-zA-Z])/\\]/g;
    $texFile =~ s/\\eqs(?![a-zA-Z])/\\begin{eqnarray*}/g;
    $texFile =~ s/\\endeqs(?![a-zA-Z])/\\end{eqnarray*}/g;
    $texFile =~ s/\\eqsn(?![a-zA-Z])/\\begin{eqnarray*}/g;
    $texFile =~ s/\\endeqsn(?![a-zA-Z])/\\end{eqnarray*}/g;
    $texFile =~ s/\\(begin|end)\s*\{\s*equation\s*\}/\\$1\{displaymath\}/g;
    $texFile =~ s/\\(begin|end)\s*\{\s*eqnarray\s*\}/\\$1\{eqnarray*\}/g;

    $texFile =~ s/\\left\.?(?![a-zA-Z])//g;
    $texFile =~ s/\\right\.?(?![a-zA-Z])//g;
    $texFile =~ s/\\mathcal(?![a-zA-Z])//g;

    $texFile =~ s/\\min(?![a-zA-Z])/\\htmin/g;
    $texFile =~ s/\\max(?![a-zA-Z])/\\htmax/g;
    $texFile =~ s/\\inf(?![a-zA-Z])/\\htinf/g;
    $texFile =~ s/\\sup(?![a-zA-Z])/\\htsup/g;
    $texFile =~ s/\\int(?![a-zA-Z])/\\htint/g;
    $texFile =~ s/\\sum(?![a-zA-Z])/\\htsum/g;
    $texFile =~ s/\\prod(?![a-zA-Z])/\\htprod/g;
    $texFile =~ s/\\lim(?![a-zA-Z])/\\htlim/g;
    # References in parentheses are common (e.g. equation numbers)
    # and L2H will only ignore the \ref command, leaving () in HTML!
    $texFile =~ s/\s*\(\\ref\{.*?\}\)//g;
    # A similar problem happens with \cite. Discarding \cite
    # can leave spaces before a word and the sentence-ending period.
    $texFile =~ s/\s*\\cite(?![a-zA-Z])/\\cite/g;

    # The \unmoved command is useless and can even disturb LaTeX2HTML
    # in some cases. So we must remove it from the TeX file passed to L2H.
    $texFile =~ s/\\unmoved(?![a-zA-Z])//g;

    # The created TeX file will have the same name as the input file
    # less the .tex extension.
    my $prefix = $ARGV[0];
    $prefix =~ s/\.tex$//i;
    my($vol,$dir,$fn) = File::Spec->splitpath ($prefix);
    $prefix = $fn;

    my $texFileName = File::Spec->catfile ($tempDir, "$prefix.tex");

    # Since LaTeX2HTML needs a file, we must create a
    # standalone LaTeX document.
    open (TEXFILE, ">$texFileName") or die "cannot create $texFileName";
    my $selfMaster = $preamble eq "";
    if (!$selfMaster) {
        print TEXFILE $preamble;
        print TEXFILE "\n\\begin{document}\n";
    }
    print TEXFILE $texFile;
    if (!$selfMaster) {
        print TEXFILE "\n\\end{document}\n";
    }
    close TEXFILE;

    # Construct a command line and call LaTeX2HTML
    my $cmdLine = "latex2html -split 0 -link 0 -nonavigation -nofootnode " .
        "-prefix $prefix -nosubdir -nomath -tmp \"$tempDir\" " .
        "-noinfo -noauto_link -noaddress " .
        ($images ? "-white " : "-nolatex -noimages ") .
        "-html_version 4.1,math,unicode \"$texFileName\"";
    # We cd to the directory where the TeX file is to allow
    # L2H to find .aux files.
    chdir ($texDir);
    system ($cmdLine) == 0 or
        die "could not execute latex2html successfully";
    chdir ($oldCurDir);

    # Now, we have a bunch of files created by LaTeX2HTML.
    # Since splitting is disabled, we should have only one
    # HTML file but some images may have been created.
    # We need to get back the HTML contents but also
    # to copy the images.
    opendir (TMPDIR, $tempDir) or die "cannot open directory $tempDir";
    my $fn = readdir (TMPDIR);
    my $htmlFile = undef;
    while (defined ($fn)) {
        next if $fn eq "." || $fn eq "..";
        my $ffn = File::Spec->catfile ($tempDir, $fn);
        next if -d ($ffn) || !-r ($ffn);
        if (!defined ($htmlFile) &&
            $ffn =~ /\.html?$/) {
            # The found file is readable, have an HTML extension
            # and no HTML file were previously found.
            # There should be only one HTML file
            open HTML, $ffn or die "cannot open $ffn";
            $htmlFile = join ("", <HTML>);  # Reads the file in one step
            close HTML;
            $htmlFile =~ s/<BODY.*?>(.*)<\/BODY>//is; # Only keep the body
            $htmlFile = $1;
            # Current browsers don't display &lbrace; and &rbrace;
            # properly and LaTeX2HTML outputs these sequences
            # when the Unicode extension is activated.
            $htmlFile =~ s/&lbrace;/\{/g;
            $htmlFile =~ s/&rbrace;/\}/g;
            # Sometimes, latex2html with the math extension
            # creates bad constructs such as &alpha#alpha;.
            # Only keep &alpha;.
            $htmlFile =~ s/&([a-zA-Z]+)\#.*?;/&\1;/g;
            # Some versions of LaTeX2HTML write mathend at each math expression.
            # The marker appears when the LaTeX file has some complications like
            # multine section titles, and maybe other multine commands!
            $htmlFile =~ s/\n?<tex2html_verbatim_mark>mathend000\#//g;
        }
        elsif ($images && $ffn =~ /\.(png|gif)$/) {
            # The found file is readable and is a GIF or PNG image.
            # All the images will simply be copied into the
            # html subdir of the outdir.
            open IMGIN, $ffn or die "cannot open $fn";
            open IMGOUT, ">" . File::Spec->catfile ($htmlOutDir, $fn)
                or die "cannot create $fn";
            print IMGOUT join ("", <IMGIN>);
            close IMGOUT;
            close IMGIN;
        }
    } continue {
        $fn = readdir (TMPDIR);
    }
    closedir TMPDIR;
    # This should never happen
    die "could not find the HTML output file" if !defined ($htmlFile);

    # Now, we have the converted LaTeX into the htmlFile variable.
    # We must split its contents and put the parts into corresponding
    # comment blocks.
    my @htmlFileParts = split /(<!--TEXJAVA:\d+\s*-->)/, $htmlFile;
    my $currentInd = -1;
    foreach my $sbl (@htmlFileParts) {
        if ($sbl =~ /^<!--TEXJAVA:(\d+)\s*-->$/) {
            $currentInd = $1;
        }
        elsif ($currentInd != -1 ||
               ($selfMaster && @htmlFileParts == 1)) {
            $currentInd = 0 if $currentInd == -1;
            # Sometimes, converted LaTeX blocks contain images
            # with alternative text containing LaTeX code.
            # We will assume that \u is never used in the converted
            # HTML, see processTextBlock for more information
            # about this thrick.
            $sbl =~ s/\\([uU])/\\@ \1/g;
            $convertedComments[$currentInd] .= $sbl;
        }
    }
}
