File: latexscan.mll

package info (click to toggle)
bibtex2html 1.97-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 744 kB
  • sloc: ml: 4,586; makefile: 690; perl: 50
file content (364 lines) | stat: -rw-r--r-- 12,912 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
(**************************************************************************)
(*  bibtex2html - A BibTeX to HTML translator                             *)
(*  Copyright (C) 1997-2010 Jean-Christophe Fillitre and Claude March   *)
(*                                                                        *)
(*  This software is free software; you can redistribute it and/or        *)
(*  modify it under the terms of the GNU General Public                   *)
(*  License version 2, as published by the Free Software Foundation.      *)
(*                                                                        *)
(*  This software is distributed in the hope that it will be useful,      *)
(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *)
(*                                                                        *)
(*  See the GNU General Public License version 2 for more details         *)
(*  (enclosed in the file GPL).                                           *)
(**************************************************************************)

(*
 * bibtex2html - A BibTeX to HTML translator
 * Copyright (C) 1997 Jean-Christophe FILLIATRE
 * 
 * This software is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License version 2, as published by the Free Software Foundation.
 * 
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * 
 * See the GNU General Public License version 2 for more details
 * (enclosed in the file GPL).
 *)

(*i $Id: latexscan.mll,v 1.40 2010-02-22 07:38:19 filliatr Exp $ i*)

(*s This code is Copyright (C) 1997 Xavier Leroy. *)

{
  open Printf
  open Latexmacros

  type math_mode = MathNone | MathDisplay | MathNoDisplay

  let brace_nesting = ref 0
  let math_mode = ref MathNone

  let is_math_mode () = 
    match !math_mode with
      | MathNone -> false
      | MathDisplay | MathNoDisplay -> true

  let hevea_url = ref false
  let html_entities = ref false

  let save_nesting f arg =
    let n = !brace_nesting in 
    brace_nesting := 0;
    f arg;
    brace_nesting := n

  let save_state f arg =
    let n = !brace_nesting and m = !math_mode in
    brace_nesting := 0;
    math_mode := MathNone;
    f arg;
    brace_nesting := n;
    math_mode := m

  let verb_delim = ref (Char.chr 0)

  let r = Str.regexp "[ \t\n]+"
  let remove_whitespace u = Str.global_replace r "" u

  let print_latex_url u =
    let u = remove_whitespace u in
    print_s (sprintf "<a href=\"%s\">%s</a>" u u)
  
  let print_hevea_url u t = 
    let u = remove_whitespace u in
    print_s (sprintf "<a href=\"%s\">%s</a>" u t)

  let chop_last_space s =
    let n = String.length s in
    if s.[n-1] = ' ' then String.sub s 0 (n-1) else s

  let def_macro s n b =
    if not !Options.quiet then begin
      eprintf "macro: %s = %s\n" s b; 
      flush stderr
    end;
    let n = match n with None -> 0 | Some n -> int_of_string n in
    let rec code i subst = 
      if i <= n then
	let r = Str.regexp ("#" ^ string_of_int i) in
	[Parameterized 
	    (fun arg -> 
	      let subst s = Str.global_replace r (subst s) arg in
	      code (i+1) subst)]
      else begin
	let _s = subst b in
	(* eprintf "subst b = %s\n" s; flush stderr; *)
	[Recursive (subst b)]
      end
    in
    def s (code 1 (fun s -> s))

}

let space = [' ' '\t' '\n' '\r']
let float = '-'? (['0'-'9']+ | ['0'-'9']* '.' ['0'-'9']*)
let dimension = float ("sp" | "pt" | "bp" | "dd" | "mm" | "pc" |
		       "cc" | "cm" | "in" | "ex" | "em" | "mu")

rule main = parse
(* Comments *)
    '%' [^ '\n'] * '\n' { main lexbuf }
(* Paragraphs *)
  | "\n\n" '\n' *
                { print_s "<p>\n"; main lexbuf }
(* Font changes *)
  | "{\\it" " "* | "{\\itshape" " "*
                  { print_s "<i>";
                    save_state main lexbuf;
                    print_s "</i>"; main lexbuf }
  | "{\\em" " "* | "{\\sl" " "* | "{\\slshape" " "*
                  { print_s "<em>";
                    save_state main lexbuf;
                    print_s "</em>"; main lexbuf }
  | "{\\bf" " "* | "{\\sf" " "* | "{\\bfseries" " "* | "{\\sffamily" " "*
                  { print_s "<b>";
                    save_state main lexbuf;
                    print_s "</b>"; main lexbuf }
  | "{\\sc" " "*  | "{\\scshape" " "* | "{\\normalfont" " "* 
  | "{\\upshape" " "* | "{\\mdseries" " "* | "{\\rmfamily" " "* 
                  { save_state main lexbuf; main lexbuf }
  | "{\\tt" " "* | "{\\ttfamily" " "* 
                  { print_s "<tt>";
                    save_state main lexbuf;
                    print_s "</tt>"; main lexbuf }
  | "{\\small" " "*
                  { print_s "<font size=\"-1\">";
                    save_state main lexbuf;
                    print_s "</font>"; main lexbuf }
  | "{\\rm" " "*
                  { print_s "<span style=\"font-style: normal\">";
                    save_state main lexbuf;
                    print_s "</span>"; main lexbuf }
  | "{\\cal" " "*
                  { save_state main lexbuf; main lexbuf }
  | "\\cal" " "*  { main lexbuf }
(* Double quotes *)
(***
  | '"'           { print_s "<tt>"; indoublequote lexbuf;
                    print_s "</tt>"; main lexbuf }
***)
(* Verb, verbatim *)
  | ("\\verb" | "\\path") _  
                { verb_delim := Lexing.lexeme_char lexbuf 5;
                  print_s "<tt>"; inverb lexbuf; print_s "</tt>";
                  main lexbuf }
  | "\\begin{verbatim}"
                { print_s "<pre>"; inverbatim lexbuf;
                  print_s "</pre>"; main lexbuf }
(* Raw html, latex only *)
  | "\\begin{rawhtml}"
                { rawhtml lexbuf; main lexbuf }
  | "\\begin{latexonly}"
                { latexonly lexbuf; main lexbuf }
(* Itemize and similar environments *)
  | "\\item[" [^ ']']* "]"
                { print_s "<dt>";
                  let s = Lexing.lexeme lexbuf in
                  print_s (String.sub s 6 (String.length s - 7));
                  print_s "<dd>"; main lexbuf }
  | "\\item"    { print_s "<li>"; main lexbuf }
(* Math mode (hmph) *)
  | "$"         { math_mode := 
		    begin
		      match !math_mode with
			| MathNone -> MathNoDisplay
			| MathNoDisplay -> MathNone
			| MathDisplay -> (* syntax error *) MathNone
		    end; 
		  main lexbuf }
  | "$$"        { math_mode := 
		    begin
		      match !math_mode with
			| MathNone -> 
			    print_s "<blockquote>";
			    MathDisplay
			| MathNoDisplay -> MathNoDisplay
			| MathDisplay -> 
			    print_s "\n</blockquote>";
			    MathNone
		    end;
                  main lexbuf }
(* \hkip *)
  | "\\hskip" space* dimension 
    (space* "plus" space* dimension)? (space* "minus" space* dimension)?
                { print_s " "; main lexbuf }
(* Special characters *)
  | "\\char" ['0'-'9']+
                { let lxm = Lexing.lexeme lexbuf in
                  let code = String.sub lxm 5 (String.length lxm - 5) in
                  print_c(Char.chr(int_of_string code));
                  main lexbuf }
  | "<"         { print_s "&lt;"; main lexbuf }
  | ">"         { print_s "&gt;"; main lexbuf }
  | "~"         { print_s "&nbsp;"; main lexbuf }
  | "``"        { print_s "&ldquo;"; main lexbuf }
  | "''"        { print_s "&rdquo;"; main lexbuf }
  | "--"        { print_s (if !html_entities then "&ndash;" else "-"); 
		  main lexbuf }
  | "---"       { print_s (if !html_entities then "&mdash;" else "-"); 
		  main lexbuf }
  | "^"         { if is_math_mode() then begin
		    let buf = Lexing.from_string (raw_arg lexbuf) in
		    print_s "<sup>";
		    save_state main buf;
		    print_s"</sup>"
		  end else
		    print_s "^"; 
		  main lexbuf }
  | "_"         { if is_math_mode() then begin
		    let buf = Lexing.from_string (raw_arg lexbuf) in
		    print_s "<sub>";
		    save_state main buf;
		    print_s"</sub>"
		  end else
		    print_s "_"; 
		  main lexbuf }
(* URLs *)
  | "\\url" { let url = raw_arg lexbuf in
	      if !hevea_url then
		let text = raw_arg lexbuf in print_hevea_url url text
	      else
		print_latex_url url;
	      main lexbuf }
  | "\\" " "
      { print_s " "; main lexbuf }
(* General case for environments and commands *)
  | ("\\begin{" | "\\end{") ['A'-'Z' 'a'-'z' '@']+ "}" |
    "\\" (['A'-'Z' 'a'-'z' '@']+ '*'? " "? | [^ 'A'-'Z' 'a'-'z'])
                { let rec exec_action = function
                    | Print str -> print_s str
                    | Print_arg -> print_arg lexbuf
                    | Raw_arg f -> f (raw_arg lexbuf)
                    | Skip_arg -> save_nesting skip_arg lexbuf
		    | Recursive s -> main (Lexing.from_string s)
		    | Parameterized f ->
			List.iter exec_action (f (raw_arg lexbuf))
		  in
		  let m = chop_last_space (Lexing.lexeme lexbuf) in
                  List.iter exec_action (find_macro m);
                  main lexbuf }
(* Nesting of braces *)
  | '{'         { incr brace_nesting; main lexbuf }
  | '}'         { if !brace_nesting <= 0
                  then ()
                  else begin decr brace_nesting; main lexbuf end }
(* Default rule for other characters *)
  | eof         { () }
  | ['A'-'Z' 'a'-'z']+
                { if is_math_mode() then print_s "<em>";
                  print_s(Lexing.lexeme lexbuf);
                  if is_math_mode() then print_s "</em>";
                  main lexbuf }
  | _           { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf }

and indoublequote = parse
    '"'         { () }
  | "<"         { print_s "&lt;"; indoublequote lexbuf }
  | ">"         { print_s "&gt;"; indoublequote lexbuf }
  | "&"         { print_s "&amp;"; indoublequote lexbuf }
  | "\\\""      { print_s "\""; indoublequote lexbuf }
  | "\\\\"      { print_s "\\"; indoublequote lexbuf }
  | eof         { () }
  | _           { print_c(Lexing.lexeme_char lexbuf 0); indoublequote lexbuf }

and inverb = parse
    "<"         { print_s "&lt;"; inverb lexbuf }
  | ">"         { print_s "&gt;"; inverb lexbuf }
  | "&"         { print_s "&amp;"; inverb lexbuf }
  | eof         { () }
  | _           { let c = Lexing.lexeme_char lexbuf 0 in
                  if c == !verb_delim then ()
                                      else (print_c c; inverb lexbuf) }
and inverbatim = parse
    "<"         { print_s "&lt;"; inverbatim lexbuf }
  | ">"         { print_s "&gt;"; inverbatim lexbuf }
  | "&"         { print_s "&amp;"; inverbatim lexbuf }
  | "\\end{verbatim}" { () }
  | eof         { () }
  | _           { print_c(Lexing.lexeme_char lexbuf 0); inverbatim lexbuf }
  
and rawhtml = parse
    "\\end{rawhtml}" { () }
  | eof         { () }
  | _           { print_c(Lexing.lexeme_char lexbuf 0); rawhtml lexbuf }

and latexonly = parse
    "\\end{latexonly}" { () }
  | eof         { () }
  | _           { latexonly lexbuf }

and print_arg = parse
    "{"         { save_nesting main lexbuf }
  | "["         { skip_optional_arg lexbuf; print_arg lexbuf }
  | " "         { print_arg lexbuf }
  | eof         { () }
  | _           { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf }

and skip_arg = parse
    "{"         { incr brace_nesting; skip_arg lexbuf }
  | "}"         { decr brace_nesting;
                  if !brace_nesting > 0 then skip_arg lexbuf }
  | "["         { if !brace_nesting = 0 then skip_optional_arg lexbuf;
                  skip_arg lexbuf }
  | " "         { skip_arg lexbuf }
  | eof         { () }
  | _           { if !brace_nesting > 0 then skip_arg lexbuf }

and raw_arg = parse
    " "         { raw_arg lexbuf }
  | '{'         { nested_arg lexbuf }
  | "["         { skip_optional_arg lexbuf; raw_arg lexbuf }
  | '\\' ['A'-'Z' 'a'-'z']+
                { Lexing.lexeme lexbuf }
  | eof         { "" }
  | _           { Lexing.lexeme lexbuf }

and nested_arg = parse
    '}'         { "" }
  | '{'         { let l = nested_arg lexbuf in
		  "{" ^ l ^ "}" ^ (nested_arg lexbuf) }
  | eof         { "" }
  | [^ '{' '}']+{ let x = Lexing.lexeme lexbuf in
		  x ^ (nested_arg lexbuf)   }

and skip_optional_arg = parse
    "]"         { () }
  | eof         { () }
  | _           { skip_optional_arg lexbuf }

(* ajout personnel: [read_macros] pour lire les macros (La)TeX *)

and read_macros = parse
  | "\\def" ('\\' ['a'-'z' 'A'-'Z' '@']+ as s) ("#" (['0'-'9']+ as n))?
      { let b = raw_arg lexbuf in
	def_macro s n b;
        read_macros lexbuf }
  | "\\newcommand" space* 
    "{" ("\\" ['a'-'z' 'A'-'Z']+ as s) "}" ("[" (['0'-'9']+ as n) "]")?
      { let b = raw_arg lexbuf in
	def_macro s n b;
        read_macros lexbuf }
  | "\\let" ('\\' ['a'-'z' 'A'-'Z' '@']+ as s) '='
      { let b = raw_arg lexbuf in
	def_macro s None b;
        read_macros lexbuf }
  | eof 
      { () }
  | _   
      { read_macros lexbuf }