File: texexpand

package info (click to toggle)
latex2html 98.1p1-5
  • links: PTS
  • area: contrib
  • in suites: hamm
  • size: 5,724 kB
  • ctags: 2,564
  • sloc: perl: 30,202; makefile: 696; sh: 120
file content (1111 lines) | stat: -rwxr-xr-x 37,334 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
#!/usr/bin/perl 

# $Id: texexpand,v 1.21 1998/02/19 22:26:49 latex2html Exp $
#
# texexpand for LaTeX2HTML 97.2

# Based on texexpand by Robert Thau, MIT AI lab, including modifications by
# Franz Vojik <vojik@de.tu-muenchen.informatik>
# Nikos Drakos <nikos@cbl.leeds.ac.uk>
# Sebastian Rahtz <spqr@uk.ac.tex.ftp>
# Maximilian Ott <max@com.nec.nj.ccrl>
# Martin Boyer
# Herbert Swan
# Jens Lippmann

# Recognizes \documentclass, \documentstyle, \usepackage, \RequirePackage,
# \begin{verbatim}...\end{verbatim}, %begin{latexonly}...%end{latexonly},
# \begin{latexonly}...\end{latexonly}, \input, \include, \verb, \latex
# \endinput, \end{document}
# \includecomment, \excludecomment
# \begin{"to exclude"}, \end{"to exclude"}
# %begin{"to exclude"}, %end{"to exclude"}

###############################################################################
# Notes:
#
# General translation mechanism:
#
#
# The main program latex2html calls texexpand with the document name
# in order to expand some of its \input and \include statements, here
# also called 'merging', and to write a list of sensitized style, class,
# input, or include file names.
# When texexpand has finished, all is contained in one file, TMP_foo.
# (assumed foo.tex is the name of the document to translate).
# 
# In this version, texexpand cares for following environments
# that may span include files / section boundaries:
#  a) \begin{comment}
#  b) %begin{comment}
#  c) \begin{any}  introduced with \excludecomment
#  d) %begin{any}
#  e) \begin{verbatim}
#  f) \begin{latexonly}
#  g) %begin{latexonly}
# 
# a)-d) cause texexpand to drop its contents, it will not show up in the
# output file. You can use this to 'comment out' a bunch of files, say.
# 
# e)-g) prevent texexpand from expanding input files, but the environment
# content goes fully into the output file.
# 
# Together with each merging of \input etc. there are so-called %%%texexpand
# markers accompanying the boundary.
# 
# When latex2html reads in the output file, it uses these markers to write
# each part to a separate file, and process them further.
#
#
#
# Detailed technical notes:
#
# 1. %begin{latexonly} and %end{latexonly} have to be on a separate line.
#    Anything between these tags (including the tags) is discarded.
# 2. \begin{latexonly} and \end{latexonly} have to be on a separate line.
#    Anything between these tags (including the tags) is not expanded.
# 3. [%\]begin{"to exclude"} and [%\]end{"to exclude"} have to be on a
#    separate line.
#    Anything between these tags (including the tags) is discarded.
# 4. \begin{verbatim/verbatim*} and \end{verbatim/verbatim*} have to be
#    on a separate line.
#    Anything between these tags (including the tags) is not expanded.
# 5. The scope of any such tags may extend over several files.
#    The opening tag for latexonly may occur on a different include level
#    than the closing tag.
#    The opening tag for verbatim/"to exclude" must occur within the same
#    file than the closing tag.
# 6. Warnings are printed when the document has been parsed and open
#    tags remain.
# 7. When in a "to exclude"/verbatim environment, texexpand won't recognize
#    ANY command except the corresponding closing tag.
#    There cannot be any nested constructions.
#    This behaviour is identical to that of LaTeX.
# 8. \begin{latexonly},\end{latexonly} may be nested, whereas
#    %begin{latexonly},%end{latexonly} may not be nested.
# 9. A "%" tag cannot close a "\" tag, and vice versa.
# 10. Every \document(class|style), \usepackage, \input and \include command
#     has to be on a separate line.
# 11. Everything behind a `%' that isn't preceded by a `\' is regarded as
#     a comment, i.e. it is printed but not interpreted.
# 12. If any command listed in 10. is preceded by an occurence of `\verb' or
#    `\latex' then it is NOT interpreted. This crashes on lines like this:
#        blah blah \verb+foo foo+ \input{bar} % bar won't be loaded!
# 13. Packages provided via \usepackage are handled the same way as
#    `options' in \document(class|style), i.e. they are included when
#    -auto_exclude is off, the package isn't in @dont_include *OR* the
#    package is in @do_include (new). They are added to the style file 
#    together with their options if the file itself hasn't been merged.
#    \documentclass[options]{class} searches for every option.clo,
#    \documentstyle[options]{style} searches for every option.sty.
#    \usepackage[options]{packages} searches for every package.sty.
# 14. Each texinputs directory is searched for input files/styles. If it
#    ends in `//', the whole subdirectory tree is searched.
# 15. \input / \include merge the given file (if found under the given
#    name or with .tex extension) if its basename is in @do_include or if it
#    isn't in @dont_include or if the given filename doesn't end in 
#    .sty/.clo/.cls when -auto_exclude is set.
#
###############################################################################
# History:
#   mro = Marek Rouchal <marek@saftsack.fs.uni-bayreuth.de>
#   jcl = Jens Lippmann <lippmann@cdc.informatik.tu-darmstadt.de>
#
# $Log: texexpand,v $
# Revision 1.21  1998/02/19 22:26:49  latex2html
# th-darmstadt -> tu-darmstadt
#
# Revision 1.20  1997/12/04 07:35:25  RRM
#  --  include a  use lib  command, to find the Override.pm  module
#  --  generalised pattern for matching verbatim-like environments
#
# Revision 1.19  1997/11/05 11:31:27  RRM
#  --  changed the way Override.pm is called; this should work better.
#
# Revision 1.18  1997/10/14 16:28:16  JCL
# o added command line option -unsegment and $UNSEGMENT
#   Use latex2html -unsegment, or texexpand -unsegment, or set $UNSEGMENT to 1
#   in latex2html.config.
#
# Revision 1.17  1997/10/10 10:40:07  RRM
#  --  Oops, didn't quite get that right last time.
#
# Revision 1.15  1997/10/09 07:11:14  RRM
#  --  temporary fix to the Override problem
#
# Revision 1.14  1997/10/06 16:02:29  UW
# override.pm contains now unlink() too. Adapted the call to override.pm
# accordingly
#
# Revision 1.13  1997/10/06 14:49:37  UW
# Added support for override.pm to texepand.
# Furthermore, all references to the path-delimiter ':'
# should now be made via $envkey
# Texepand used previously the variable $DS as directory delimiter. Since
# all other modules use $dd, I changed $DS to $dd.
#
# Revision 1.12  1997/09/27 10:36:14  JCL
# o several enhancements to the inline documentation
# o small fix to &interprete, \input|include now doesn't loose the comment
#   if merging fails
# o introduced -no_segments switch (or set shell variable $NO_SEGMENTS to 1):
#   This will force a segmented document to expand its segment files, so
#   that it may be processed as a whole with LaTeX2HTML.
#   Use this feature to test a segmented document or whenever a document
#   needs to be fully expanded.
#   XtractFAQ will need this feature to determine the FAQ entries.
#
# Revision 1.11  1997/06/15 18:26:00  JCL
# Now texexpand will only merge files that exist *and* are readable.
# (Trying to merge a void link caused it to crash on my site.)
#
# Revision 1.10  1997/06/06 14:13:54  RRM
# This is the texexpand for V97.1.
#
#     only dofference is that it is quieter under  -debug .
#     use  -verbosity <num>  as well, to get all the previous messages,
#     when  <num> is at least 2.
#
# Revision 1.9  1997/03/24 12:26:15  RRM
# Implemented a new class of environments: $keepcomments .
# This allows environments of TeX-like code to be preserved verbatim,
# and passed to LaTeX for processing: e.g. picture, makeimage, xy  etc.
# Also, fixed the bug which loses any code on the same line as, but preceding
# an  \input  or  \include  command.
#
# Revision 1.8  1997/03/03 20:35:42  JCL
# added some comments
#
# Revision 1.7  1996/12/21 20:30:00  JCL
# - small changes to get verbatim parsed separately from verbatim*
# - provided expand test for regression suite
# - bound diagnostic status messages to debug level
#
#   texexpand is operational
#
# Revision 1.6  1996/12/20 20:27:08  JCL
# fixed severe bug with my $DS variable :-[
#
# Revision 1.5  1996/12/20 18:51:54  JCL
# *** empty log message ***
#
# Revision 1.4  1996/12/20 01:29:39  JCL
# Moved initialisation tokens for @dont_include to latex2html.config,
# to have a more central place to control them.
#
# Revision 1.3  1996/12/18 04:36:58  JCL
# substantial changes to allow for environments grouping several files
#  o chunked code into more functions
#  o revised documentation
#  o designed new parsing logic
#  o introduced parsing of \includecomment, \excludecomment to care
#    for self-defined comment environments
#  o handles default "comment" environment as known from html.sty
#  o and much more (see comments)
#
#
# V96.2a6 Fixed bug in recursive directory search for texinputs. Thanks to
#         Marcus Harnisch <harnisch@hhi.de> for reporting the bug.
#         Included possibility of adding extensions to $TEXE_DONT_INCLUDE
#         e.g. '.psfig', so that all files ending in .psfig won't be
#         \input or \include 'ed. Same for $TEXE_DO_INCLUDE. Added `o' 
#         option to some regexps.
# -------
# V96.2a5 Followed suggestions by Jens Lippmann regarding file inclusion
#         logic. Added \RequirePackage. Some minor changes.
# -------
# V96.2a4 Fixed severe bugs in comments regexp and usepackage logic.
#         Thanks to Ross Moore <roos@mpce.mq.edu.au> for reporting them.
#         Added support for LaTeX2e .clo filename extension (see 7. above)
#         Cleaned up some code, added more comments
#         Added command line option -do_include
# -------
# V96.2a3 Fixed bugs & typos
# -------
# V96.2a2 Following suggestions made by 
#         Jens Lippmann <lippmann@cdc.Informatik.TH-Darmstadt.DE>
#         Added recursive directory search for include files.
#         Added @do_include: Forces inclusion of packages (when found)
#         Some bug fixes
# -------
# V96.2a1 released Thu Oct 24 16:51:36 MET 1996
# -------
# 21-NOV-96 mro
# Almost complete rewrite by Marek Rouchal <marek@saftsack.fs.uni-bayreuth.de>
#
###############################################################################

if ($] >= 5 ) {# we have Perl 5.0 or later...

# All operating system depended stuff should be defined in
# override.pm before used in the code. 
# See export.pm on how to do this
# This path is set to where the Override module can be found.
if ($ENV{'LATEX2HTMLDIR'}) { use lib $ENV{'LATEX2HTMLDIR'}; }
else {
  use lib '/usr/lib/latex2html'; # Inserted by installation script
};

use Override qw(getpwuid link setenv getenv symlink rename
		make_directory_absolute unlink $dd $envkey $DEBUG);
} else {
  print STDERR "\n*** could not configure automatically using Override.pm ***";
  print STDERR "\n*** so assuming standard Unix conventions apply ***\n";

# With Perl 5 these should have been set already, within  Override.pm  

# The key, which delimts expressions defined in the environment
# depends on the operating system. 
    $envkey=':' unless ($envkey);

# $dd is the directory delimiter character
    $dd = '/' unless ($dd);
}

    $prompt = "texexpand:";

# Initialize texinputs
    @texinputs = (".");

    if(defined $ENV{'TEXINPUTS'}) {
	foreach $dir (split(/$envkey/,$ENV{'TEXINPUTS'})) {
	    push (@texinputs, $dir) if($dir =~ /\S+/); # save only if non-empty
	}
    }

# Expand paths with `~'
    $homeDir = (getpwuid($<))[7];
    grep(s|^~$dd|$homeDir$dd|, @texinputs);
    grep((m|^~([^$dd]+)$dd|) && ($homeDir = (getpwnam($1))[7]) && (s||$homeDir$dd|), @texinputs);

# Initialize styles to be excluded (if any).
# This is a sanity setup in case the \d is garbled during shell
# variable handling.
# The initialisation really comes from latex2html.config.
    @dont_include = ('\d+pt');

# These are the extensions to be auto-excluded
    $dont_include_ext_rx = 'sty|cls|clo';

    if(defined $ENV{'TEXE_DONT_INCLUDE'}) {
	foreach(split(/$envkey/,$ENV{'TEXE_DONT_INCLUDE'})) {
	    &process_dont_include($_);
	}
    }

# Initialize styles to be included (if any). This overrides @dont_include
# These are the extensions to be auto-included
    $do_include_ext_rx = '';

    if(defined $ENV{'TEXE_DO_INCLUDE'}) {
	foreach(split(/$envkey/,$ENV{'TEXE_DO_INCLUDE'})) {
	    &process_do_include($_);
	}
    }

# Parse arguments
    do {
	print "texexpand for LaTeX2HTML V97.1\n";
	exit(0);
    } unless(@ARGV);

    while ($ARGV[0] =~ /^-/ && $ARGV[0] !~ /^--$/) {
	$_ = shift;
	if (/^-dont_include$/) { &process_dont_include(shift); }
	elsif (/^-do_include$/) { &process_do_include(shift); }
	elsif (/^-w$/) {} ### Compatibility hack.
	elsif (/^-auto_exclude$/) { $auto_exclude++}
	elsif (/^-unsegment$/) { $unsegment++}
	elsif (/^-save_styles$/) { $style_file = shift; }
	elsif (/^-debug$/) { $debug++; }
	elsif (/^-verbose$/) { $debug=2; }
	elsif (/^-o$/) { $outfile = shift; }
	else { print STDERR "$prompt WARNING:  Unrecognized option: $_ \n"; }
    }
    die "$prompt No input file\n" unless($infile = shift);

&initialise;
&main;
exit(0);


sub initialise {

# Create generic regexp's:
# If this matches before a command, the command is ignored.
    $ignore_cmd_rx = '(\\latex\W|\\verb)';
# This matches a square bracket pair (typically an option list).
    $options_rx = '(\[[^\]]*\])?';
# This matches a single argument.
    $arg_rx = '\{([^\}]*)\}';
    $fakeenv_rx = '(comment)';
    $keepcomments_rx = '(picture|makeimage|xy|diagram)';


# Print environments
    if ($debug) {
	print STDERR "$prompt TeX inputs are in:\n";
	foreach $dir (@texinputs) { print STDERR "$prompt   $dir\n"; }

	if ($debug>1) {
	    print STDERR "\n$prompt Special names (not to be input or included):\n";
	    foreach $name (@dont_include) { print STDERR "$prompt   $name\n"; }
	    print STDERR "\n$prompt Extensions of files not to be input or included: "
		. "$dont_include_ext_rx\n";

	    print STDERR "\n$prompt Special names (to *be* input or included):\n";
	    foreach $name (@do_include) { print STDERR "$prompt   $name\n"; }
	    print STDERR "\n$prompt Extensions of files to *be* input or included: "
		. "$do_include_ext_rx\n";
	}
    }
    print STDERR "\n$prompt %--- Expanding $infile\n" if ($debug>1);
}


sub main {
# Note that verbatim/latexonly may split over different files!
# $verbatim is 1 if inside a verbatim environment,
# $latexonly is > 0 if inside latexonly environments
# $includelevel indicates the depth of include/input
    local($includelevel) = 0;
    local($verbatim,$verbatimname) = (0,"");
    local($latexonly,$latexonlytype) = (0,"");
    local($fakeenv,$fakeenvname,$fakeenvtype) = (0,"","");
    local($keepcomments,$keepcommentsname) = (0,"");
    local($active,$mute) = (1,0);

# Main procedure
    $dont_include_rx = join("|",@dont_include);
    $do_include_rx = join("|",@do_include);

    if($style_file) {
	open(STYLES,">$style_file") || die "$prompt Cannot open style file\n";
    }
    if($outfile) {
	open(OUT,">$outfile") || die "$prompt Cannot open output file\n";
    }
    else {
	open(OUT,">&STDOUT");
    }

    &process_file($infile); # the workhorse...

    close(OUT) if ($outfile);
    close(STYLES) if ($style_file);

    print STDERR "$prompt Warning: No ${latexonlytype}end\{latexonly\} found.\n"
	if ($latexonly);
    print STDERR "$prompt Warning: No ${fakeenvtype}end\{$fakeenvname\} found.\n"
	if ($fakeenv);
    print STDERR "$prompt Warning: No \\end\{$keepcommentsname\} found.\n"
	if ($keepcomments);
    print STDERR "$prompt Warning: No \\end{verbatim} found.\n"
	if ($verbatim);
}


# Include and parse a file.
# This routine is recursive, see also &process_input_include_file,
# &process_document_header, and &process_package_cmd.
#
# Two global flags control the states of texexpand.
#  o $active is true if we should interprete the lines to expand
#    files, check for packages, etc.
#  o $mute is true if we should prevent the lines from going
#    into the out file.
#
# We have three general states of texexpand:
#  1) interprete the lines and pass them to the out file
#     This is the normal case.
#     Corresponding: $active true, $mute false
#  2) interprete minimal and suppress them
#     This is when parsing inside a comment environment, which
#     also would retain its body from LaTeX.
#     => $active false, $mute true
#  3) interprete minimal and pass the lines to the out file
#     This is inside a verbatim or latexonly environment.
#     The line of course must be at least interpreted to
#     determine the closing tag.
#     => $active false, $mute false
#
# Any environment may extend over several include files.
# Any environement except verbatim and latexonly may have its
# opening or closing tag on different input levels.
# The comment and verbatim environments cannot be nested, as
# is with LaTeX.
# We must at least parse verbatim/comment environments in
# latexonly environments, to catch fake latexonly tags.
#
# The work scheme:
# Five functions influence texexpand's behavior.
# o &process_file opens the given file and parses the non-comment part in
#   order to set $active and $mute (see above).
#   It calls &interprete to interprete the non-comment content and either
#   continues with the next line of its file or terminates if &interprete
#   detected the \end{document} or an \endinput.
# o &interprete handles some LaTeX tags with respect to the three states
#   controlled by $active and $mute.
#   Regarding to \input|include, \document(class|style), and
#   \(use|Require)package the functions &process_input_include_file,
#   &process_document_header, and &process_package_cmd are called respectively.
# o These three functions check if the file name or option files are enabled
#   or disabled for merging (via TEXE_DO_INCLUDE or TEXE_DONT_INCLUDE).
#   Any file that is to include will be 'merged' into the current file, i.e.
#   the function &process_file is called at this place in time (recursively).
#   This will stop interpretation at the current line in file, start with the
#   new file to process and continues with the next line as soon as the new
#   file is interpreted to its end.
#
# The call tree (noweb+xy.sty would be handy here):
#
#     main
#       |
#       v
#  +->process_file
#  |    |
#  |    v
#  |  interprete (with respect to the current line, one of that three)
#  |    |                           |                        |
#  |    v                           v                        v
#  |  process_input_include_file  process_document_header  process_package_cmd
#  |    |                           |                        |
#  |    v                           v                        v
#  +----+---------------------------+------------------------+
#
# Bugs:
# o Since the latexonly environment is not parsed, its contents
#   might introduce environments which are not recognized.
# o The closing tag for latexonly is not found if hidden inside
#   an input file.
# o One environment tag per line, yet!
# o If I would have to design test cases for this beast I would
#   immediately desintegrate into a logic cloud.
#
# Notes:
# o Ok, I designed test cases for it.
#   Please refer to test 'expand' of the regression test suite
#   in the developers' module of the l2h repository.
# o -unsegment feature:
#   In this (rare) case, the user wants to translate a segmented document
#   not in segments but in a whole (for testing, say).
#   We enable this by recognizing the \segment command in &interprete,
#   causing the segment file to be treated like \input but loosing the first
#   lines prior to \startdocument (incl.), as controlled via $segmentfile.
#   On how to segment a document you are best guided by section
#   ``Document Segmentation'' of the LaTeX2HTML manual.
#
sub process_file {
    local($infile) = @_;
    local(*IN);
    local($comments,$before,$orig);


    # Keep track of input/include level
    $includelevel++;

    open(IN,"<$infile") || die "$prompt Cannot open $infile\n";
    print STDERR "$prompt %--- Processing $infile\n" if ($debug > 1);

    # if we don't include this file marker LaTeX2HTML won't split
    # the document at this point
    print OUT "%%% TEXEXPAND: INCLUDED FILE MARKER $infile\n"
	if ($includelevel > 1 && $active);

    if ($segmentfile) {
	# This variable is set by &interprete to change the behavior of the
	# next file to merge.
	while(<IN>) {
	    # strip comments
	    s/(^|[^\\])(\\\\)*(%.*)/$comments = $3; $1.$2/e;
	    last if /^\s*\\startdocument/;
	}
	$segmentfile = 0;
    }

    while(<IN>) {
	#for debugging
	$orig = $_;

	# lift comments from line
	$comments = "";
	if ($keepcomments) { $comments = '' }
	else {
	    s/(^|[^\\])(\\\\)*(%.*)/$comments = $3; $1.$2/e
	}

	# Deal with latexonly environment(s)
	# begin/end tags must be on single line
	if (!$fakeenv && !$verbatim && !$latexonly &&
	    $comments =~ /%\s*begin\s*\{\s*latexonly\s*\}/) {

	    # A comment latexonly environment. May not be nested.
	    $latexonly = 1;
	    $latexonlytype = "%";
	    $active = 0;
	    $mute=1;
	}
	elsif (!$fakeenv && !$verbatim &&
	       (!$latexonly || $latexonlytype eq "\\") &&
	       /^\s*\\begin\s*\{\s*latexonly\s*\}/) {

	    # A latexonly environment. LaTeX types may be nested,
	    # but discard them as long as we are in a latexonly
	    # comment part.
	    # We definitely don't like to push the "\\", "%" types
	    # onto a stack to keep track of them in alternating types.
	    # On the other hand we won't allow for a comment type
	    # part to close a LaTeX environment, eg.
	    $latexonly++;
	    $latexonlytype = "\\";
	    $active = 0;
	}
	elsif (!$fakeenv && !$verbatim &&
	       $comments =~ /%\s*begin\s*\{\s*$fakeenv_rx\s*\}/) {
	    # Begin of a fake comment part. May not be nested.
	    $fakeenv=1;
	    $fakeenvtype="%";
	    # Remember the part name.
	    $fakeenvname = $1;
	    $active=0;
	    $mute=1 unless $latexonly;
	}
	elsif (!$fakeenv && !$verbatim && /^\s*\\begin\s*\{\s*$fakeenv_rx\s*\}/) {
	    # Begin of a fake environment. May not be nested.
	    $fakeenv="1";
	    $fakeenvtype="\\";
	    # Remember the environment name.
	    $fakeenvname = $1;
	    $active=0;
	    $mute=1 unless $latexonly;
	}
	elsif (!$fakeenv && !$verbatim && /^\s*\\begin\s*\{\s*$keepcomments_rx\s*\}/) {
	    # Begin of a keepcomments environment. May be nested.
	    if (! $keepcomments) {
		$keepcomments = 1;
		# Remember the environment name.
	        $keepcommentsname = $1;
	    } elsif ($keepcommentsname eq $1) {
		$keepcomments++;
	    }
	    $active=1;
	    $mute=1 unless $latexonly;
	}
#	elsif (!$fakeenv && !$verbatim && /\\begin\s*\{\s*verbatim(\*)?\s*\}/) {
	elsif (!$fakeenv && !$verbatim && /\\begin\s*\{\s*(\w*[Vv]erbatim\w*\*?)\s*\}/) {
	($before,$verbatimname) = ($`,$1);
	($active,$verbatim) = (0,1)
		unless ($before =~ /$ignore_cmd_rx/o);
	}

	print STDERR "$prompt %--line::${orig}%--      active=$active mute=$mute ".
	    "latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim ".
	    "keepcomments=$keepcomments\n"
		if ($debug > 1) && $orig =~ /\\begin|%\s*begin/;

	# Interprete the single line, care for file to merge,
	# locate new comment environments, etc.
	# This one does recursive calls.
	# Stop this file if we are told so.
	last
	    unless &interprete($_, $comments);

	last if $end_document;

	# Sorry for that ifs...
	if (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "%" &&
	    $comments =~ /%\s*end\s*\{\s*latexonly\s*\}/) {

	    # only %end{latexonly} can close the part
	    $latexonly=0;
	    $active = 1;
	    $mute = 0;
	}
	elsif (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "\\" &&
	    /^\s*\\end\s*\{\s*latexonly\s*\}/) {

	    # only \end{latexonly} can close the environment
	    $latexonly--;
	    $active = ($latexonly ? 0 : 1);
	}
	elsif ($fakeenv && $fakeenvtype eq "%" &&
	       $comments =~ /%\s*end\s*\{\s*$fakeenv_rx\s*\}/) {

	    # only a matching %end{name} can close the part
	    if ($1 eq $fakeenvname) {
		$fakeenv=0;
		$active = ($latexonly ? 0 : 1);
		$mute=0
		    unless $latexonly && $latexonlyenv eq "%";
	    }
	}
	elsif ($fakeenv && $fakeenvtype eq "\\" &&
	       /^\s*\\end\s*\{\s*$fakeenv_rx\s*\}/) {

	    # only a matching \end{name} can close the environment
	    if ($1 eq $fakeenvname) {
		$fakeenv=0;
		$active = ($latexonly ? 0 : 1);
		$mute=0;
	    }
	}
	elsif ($keepcomments &&
	       /^\s*\\end\s*\{\s*$keepcomments_rx\s*\}/) {

	    # only a matching \end{name} can close the part
	    if ($1 eq $keepcommentsname) {
		$keepcomments--;
		$keepcommentsname = '' unless ($keepcomments);
		$active = ($latexonly ? 0 : 1);
		$mute=0
		    unless $latexonly && $latexonlyenv eq "%";
	    }
	}
#	elsif ( /\\end\s*\{\s*verbatim(\*)?\s*\}/) {
	elsif ( /\\end\s*\{\s*(\w*[Vv]erbatim\w*\*?)\s*\}/) {
	    if ($1 eq $verbatimname) {
		$verbatim=0;
		$active = ($latexonly ? 0 : 1);
	    }
	}
	print STDERR "$prompt %--line::${orig}%--      active=$active mute=$mute ".
	    "latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim\n"
		if ($debug > 1) && $orig =~ /\\end|%\s*end/;

    }
    print OUT "%%% TEXEXPAND: END FILE $infile\n"
	if ($includelevel > 1 && $active);
    close(IN);
    $includelevel--;
}


# Handle the LaTeX tags \input, \include, \endinput, \documentclass,
# \documentstyle, \usepackage, \RequirePackage, \end{document},
# \includecomment, \excludecomment with respect to the three states
# controlled by $active and $mute.
# The state 'interprete minimal and suppress' ($active false, $mute true)
# does not require further actions, just do nothing.
# When in $active state, call one of &process_input_include_file,
# &process_document_header, or &process_package_cmd to examine the
# apropriate line further.
# 
# Returns 0 if the caller is to stop interpreting the current file (\endinput).
# Returns 1 otherwise.
# Set $end_document to 1 if an \end{document} is detected (this stops
# the whole task of texexpand).
#
sub interprete {
    local($_,$comments) = @_;
    local($line) = $_;
    local($before,$after);

    # the default to print to OUT
    $line =~ s/\n/$comments\n/;

    if ($active) {
	#looses $comments on successful input/include, document header,
	#or usepackage/RequirePackage

	if (/\\(input|include)\W/) { 
	    ($before,$after) = ($`,$&.$');
	    if ($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
                if (length($before)) {
		    #put prefix to \\input etc. to single line
		    print OUT $before,"\%\n";
		    #mask special chars
                    $before =~ s/(\W)/\\$1/g;
		    #strip prefix from total line incl. comments
		    $line =~ s/$before//;
		}
		# print total line incl. comments if merging failed
		print OUT $line
		    #may re-enter &process_file
		    unless &process_input_include_file($after);
	    }
	}
	elsif (/\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/o) {
	    $before = $`;
	    if($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
		&process_package_cmd($_);
	    }
	}
	elsif (/\\document(class|style)\s*$options_rx\s*$arg_rx/o) {
	    $before = $`;
	    if ($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
		&process_document_header($_);
	    }
	}
	elsif ($unsegment && /^\s*\\segment(\*?)\s*$options_rx\s*$arg_rx\s*$arg_rx\s*/) {
	    # We found a segmenting command which must vanish.
	    # Therefore, mutate the \segment into the section command specified
	    # by $4 (section, subsection, ...) and $1 (* or empty) followed by
	    # the section text, and an \input statement with filename $3.
	    # To obtain the section text, we need to take a preview to the next
	    # lines, as it might be truncated with %'s.
	    # Line truncations between the regex above (like \segment%\n) are
	    # not recognized.
	    # There are as much lines fetched as required to satisfy the equality
	    # of the amounts of left and right braces, since we aren't able to
	    # handle nested brace pairs.
	    # If this strategy fails, texexpand is terminated, thereby satisfying
	    # the 'all or nothing' requirement.

	    local($file) = $3;
	    print OUT "\\$4$1";
	    $after = $_ = $'; #get tail
	    local($left,$right) = (tr/\{/\{/,tr/\}/\}/);
	    while (($left != $right) || !$left) {
		#braces not balanced or no opening brace at all, get next line
		$_ = <IN>;
		die "$prompt arguments to \\segment are too complex"
		    unless length($_) && length($after) < 500;
		# strip comments
		s/(^|[^\\])(\\\\)*(%.*)/$1$2/;
		$left += tr/\{/\{/; $right += tr/\}/\}/;
		$after .= $_;
	    }
	    $after =~ /\}([^\}]*)$/;
	    $after = $1;
	    $_ = $`;
	    # Ok we have it. $_ should carry the whole section title plus
	    # opening brace, the original lines squeezed into one.
	    print OUT $_,"}\n";

	    # set this globally to control behavior of next &process_file
	    $segmentfile = 1;
	    die "$prompt segment file <$file> could not be merged"
		unless &process_input_include_file("\\input\{$file\}$after");
	}
	# Print the first /end{document}, only.  Truncate anything after it.
	elsif (/^(.*\\end\{document\})/) {
	    $before = $1;
	    if ($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
		print OUT "$before\n";
		$end_document++;
	    }
	}
	elsif (/\\endinput/) {
	    $before=$`;
	    return(0)	#stop this file
		if ($includelevel > 1 && $before !~ /$ignore_cmd_rx/o);
	}
	elsif (/\\(in|ex)cludecomment\s*$arg_rx/o) {
	    local($mode,$env) = ($1,$2);

	    $env =~ s/\s//g; #strip space
	    # escape special chars (such as "*"), but reject "|"
	    $env =~ s/(\W)/\\$1/g;
	    unless ($env =~ /\|/) {
		$fakeenv_rx =~ /\((.*)\)/;
		# might also be empty
		local(@envs) = split(/\|/,$1);

		if ($mode eq "ex") {
		    push(@envs,$env);
		}
		else {
# a dumb try to forget the comment environment if redefined
		    $env =~ s/\\/\\\\/g;
		    #must not use $_ inside grep pattern!
		    @envs = grep(!/$env/,@envs);
		}
		$fakeenv_rx = "\(".join("|",@envs)."\)";
	    }
	}
	else {
	    print OUT $line;
	}
    }
    elsif (! $mute) {
	# print line if in verbatim/comment mode
	print OUT $line;
    }
    return(1);		#continue if not $end_document
}


sub process_input_include_file {
    local($_) = @_;
    local($before,$after,$class,$styles);

    print STDERR "$prompt %--- Found include at level $includelevel: $_\n"
	if($debug);

# Get filename
    local($filename) = "";

    if (/(\\input|\\include)(\s*$arg_rx|\s+(\S+)\s)/o) {
	# $class serves as temporary storage
	($before,$after,$class,$filename) = ($`, $', $1.$2, $3.$4);
	$filename =~ s/\s//g;
    }
    else {
	print STDERR "$prompt %--- COULDN'T FIND FILENAME\n" if($debug);
    }

    if ($filename) {
	# Get base name
	$styles = $filename;
	$styles =~ s|.*$dd||; # strip path
	$styles =~ s/\.[^.]*$//; # strip extension

	# Sorry for the next if-statement... (hmm,ok)
	if ($styles !~ /^($do_include_rx)$/o &&
	    $filename !~ /\.($do_include_ext_rx)$/o &&
	    ($styles =~ /^($dont_include_rx)$/o || 
	     ($auto_exclude && $filename =~ /\.($dont_include_ext_rx)$/o))) {
	    print STDERR "$prompt %--- ignoring $filename\n" if($debug);
	    print STYLES "$styles\n" if($style_file);
	}
	else {
	    local($fname) = &find_file($filename);

	    # notify anyway that a file is found, to allow a Perl
	    # module loaded for this specific file
#	    print STYLES "$styles\n" if($style_file);

	    if($fname) {
		print OUT "$before";

		# recursive call
		&process_file($fname);

		print OUT $after if($after =~ /\S+/);
		print STDERR "$prompt %--- successfully included $filename\n"
		    if($debug > 1);

		return(1); #merge
	    }
	    else {
		print STDERR "$prompt include $filename failed. Reinserting $before command\n";
	    }
	}
    }
    return(0);		#no merge
}


sub process_document_header {
    local($_) = @_;
    local(%style_include,@print_styles,$key);

    /\\document(class|style)\s*$options_rx\s*$arg_rx/o;
    local($before, $latextype, $styles, $class, $after) = ($`, $1, $2, $3, $');

    print STDERR "$prompt %--- Found $1: $_\n" if($debug);
    $styles =~ s/\[(.*)\]/$1/; # Strip braces
    $class =~ s/\s//g;	# Strip spaces
    # the class cannot be included, so stuff it in the style file
    print STYLES "$class\n" if($style_file);

    foreach $key (split(/,/, $styles)) {
	$key =~ s/\s//g; # strip spaces
	push(@print_styles,$key);
	if (&should_include($key)) {
# mark the style for inclusion and search for the
# corresponding .clo (LaTeX2e) or .sty (LaTeX209)
# &find_file gives the filename or undef.
	    $style_include{$key} =
		&find_file($key . (($latextype =~ /class/) ? '.clo' : '.sty'));
	}
    }
    $styles = '';
    foreach $key (@print_styles) {
	if(!$style_include{$key}) {
# put style back into command and save it to the style file
	    print STYLES "$key\n" if($style_file);
	    $styles .= ',' . $key;
	}
    }
    if ($styles) {
	$styles =~ s/^,//;
	$styles = '[' . $styles . ']';
    }
    print OUT join('', $before, "\\document", $latextype, $styles,
		   '{', $class, '}', $after);
    # Include styles after the \document(class|style) command
    foreach $key (@print_styles) {
	if($style_include{$key}) {
	    &process_file($style_include{$key});
	}
    }
}


sub process_package_cmd {
    local($_) = @_;
    local(%style_include,@print_styles,$key);

    /\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/o;
    local ($before,$class,$options,$styles,$after) = ($`, $1, $2, $3, $');

    print STDERR "$prompt %--- Found \\$class: $_\n" if($debug > 1);
    $options =~ s/\[(.*)\]/$1/o; # strip braces

    foreach $key (split(/,/,$styles)) {
	$key =~ s/\s//g; # strip spaces
	# Remember each package and check whether to merge it
	push(@print_styles,$key);
	if (&should_include($key)) {
	    $style_include{$key}=&find_file($key . '.sty');
	}
    }
    $styles = '';
    foreach $key (@print_styles) {
	if (!$style_include{$key}) {
	    # print to style file and reinsert into command
	    # if package is not to be merged
	    print STYLES "$key $options\n" if ($style_file); 
	    $styles .= ',' . $key;
	}
    }
    if($styles) {
	# Reconstruct command
	$styles =~ s/^,//;
	$options = '[' . $options . ']' if($options =~ /\S+/);
	print OUT $before . '\\' . $class . $options .
	    '{' . $styles . '}' . $after;
    }
    else { print OUT $before . $after; }
    foreach $key (@print_styles) {
	if($style_include{$key}) {
			# merge style files
	    &process_file($style_include{$key});
	}
    }
}


sub process_dont_include {
    local($item) = @_;
    if($item =~ s/^\.//) { # starts with `.'? Then it's an extension
	$dont_include_ext_rx .= '|' . $item;
    }
    else {
	push(@dont_include,$item);
    }
}

sub process_do_include {
    local($item) = @_;
    if($item =~ s/^\.//) { # starts with `.'? Then it's an extension
	$do_include_ext_rx .= (($do_include_ext_rx eq '') ? '' : '|') . $item;
    }
    else {
	push(@do_include,$item);
    }
}


# Returns true if style has to be included, i.e.:
#  1. The style is found in do_include *or*
#  2. Automatic exclusion is disabled and the style is *not* found in
#     dont_include
#
sub should_include {
    local($style) = @_;

    return($style =~ /^($do_include_rx)$/o ||
	   (!$auto_exclude && $style !~ /^($dont_include_rx)$/o ));
}


sub find_file {
    local($file) = @_;
    local($fname,$dname);
    local($found)=0;
    print STDERR "$prompt %--- checking for $file\n" if($debug);      

    if ($file =~ m|^$dd|) {
	$fname=$file;
	if(&file_or_ext) { $found=1; }
    }
    else {
	# search input directories
	foreach $dir (@texinputs) {
	    ($dname = $dir) =~ s|[$dd]+$||; # Remove slashes at the end
	    if (-d $dname) {
		if ($fname = &dir_search($dir,$file)) {
		    $found = 1;
		    last;
		}
	    }
	    else {
		print STDERR "$prompt %--- Warning: \"$dname\" is no directory\n"
		    if ($debug);
	    }
	}
    }
    if ($found) {
	print STDERR "$prompt %--- found $fname\n" if ($debug);
	return($fname);
    }
    else {
	print STDERR "$prompt %--- file not found\n" if ($debug);      
	return(undef);
    }
}


sub dir_search {	# search directory recursively
    local($dir,$file) = @_;
    local(*SUBDIR);	# make file pointer local
    local($dname,$found,$recursive) =('',0,0);

    if ($dir =~ m|$dd$dd$|) { # does dir end in `//'?
	$recursive = 1;
    }
    $dir =~ s|[$dd]+$||; # Remove any slashes at the end
    local($fname) = join ($dd, $dir, $file);

    print STDERR "$prompt %--- looking for $fname\n" if($debug);
    # Does file exist in this directory?
    if (&file_or_ext) {
	return($fname);
    }
    elsif ($recursive) { # descend into subdirectories?
	# search directory for subdirectories
	opendir(SUBDIR,$dir); # open directory
	while ($_=readdir(SUBDIR)) { # read dir-entries
	    next if(/^\./); # do not check dotfiles
	    $dname = join ($dd, $dir, $_);
	    if ((-d $dname) && ($fname = &dir_search($dname.$dd.$dd,$file))) {
		$found = 1;
		last;
	    }
	}
	close(SUBDIR);
	if ($found) {
	    return($fname);
	}
    }
    return(0);
}


sub file_or_ext {
    # Modifies $fname
    # if $fname exists return success otherwise
    # if $fname.tex exists, then bind $fname to $fname.tex and return success
    # else fail

    return 1 if -f $fname && -r $fname;# && -s $fname;
    return 0 if $fname =~ /\.tex$/;
    $fname .= ".tex";
    return 1 if -f $fname && -r $fname;# && -s $fname;
    return 0;
}