1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
|
(**************************************************************************)
(* bibtex2html - A BibTeX to HTML translator *)
(* Copyright (C) 1997-2010 Jean-Christophe Fillitre and Claude March *)
(* *)
(* This software is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU General Public *)
(* License version 2, as published by the Free Software Foundation. *)
(* *)
(* This software is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
(* *)
(* See the GNU General Public License version 2 for more details *)
(* (enclosed in the file GPL). *)
(**************************************************************************)
(*
* bibtex2html - A BibTeX to HTML translator
* Copyright (C) 1997 Jean-Christophe FILLIATRE
*
* This software is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
* See the GNU General Public License version 2 for more details
* (enclosed in the file GPL).
*)
(*i $Id: latexscan.mll,v 1.40 2010-02-22 07:38:19 filliatr Exp $ i*)
(*s This code is Copyright (C) 1997 Xavier Leroy. *)
{
open Printf
open Latexmacros
type math_mode = MathNone | MathDisplay | MathNoDisplay
let brace_nesting = ref 0
let math_mode = ref MathNone
let is_math_mode () =
match !math_mode with
| MathNone -> false
| MathDisplay | MathNoDisplay -> true
let hevea_url = ref false
let html_entities = ref false
let save_nesting f arg =
let n = !brace_nesting in
brace_nesting := 0;
f arg;
brace_nesting := n
let save_state f arg =
let n = !brace_nesting and m = !math_mode in
brace_nesting := 0;
math_mode := MathNone;
f arg;
brace_nesting := n;
math_mode := m
let verb_delim = ref (Char.chr 0)
let r = Str.regexp "[ \t\n]+"
let remove_whitespace u = Str.global_replace r "" u
let print_latex_url u =
let u = remove_whitespace u in
print_s (sprintf "<a href=\"%s\">%s</a>" u u)
let print_hevea_url u t =
let u = remove_whitespace u in
print_s (sprintf "<a href=\"%s\">%s</a>" u t)
let chop_last_space s =
let n = String.length s in
if s.[n-1] = ' ' then String.sub s 0 (n-1) else s
let def_macro s n b =
if not !Options.quiet then begin
eprintf "macro: %s = %s\n" s b;
flush stderr
end;
let n = match n with None -> 0 | Some n -> int_of_string n in
let rec code i subst =
if i <= n then
let r = Str.regexp ("#" ^ string_of_int i) in
[Parameterized
(fun arg ->
let subst s = Str.global_replace r (subst s) arg in
code (i+1) subst)]
else begin
let _s = subst b in
(* eprintf "subst b = %s\n" s; flush stderr; *)
[Recursive (subst b)]
end
in
def s (code 1 (fun s -> s))
}
let space = [' ' '\t' '\n' '\r']
let float = '-'? (['0'-'9']+ | ['0'-'9']* '.' ['0'-'9']*)
let dimension = float ("sp" | "pt" | "bp" | "dd" | "mm" | "pc" |
"cc" | "cm" | "in" | "ex" | "em" | "mu")
rule main = parse
(* Comments *)
'%' [^ '\n'] * '\n' { main lexbuf }
(* Paragraphs *)
| "\n\n" '\n' *
{ print_s "<p>\n"; main lexbuf }
(* Font changes *)
| "{\\it" " "* | "{\\itshape" " "*
{ print_s "<i>";
save_state main lexbuf;
print_s "</i>"; main lexbuf }
| "{\\em" " "* | "{\\sl" " "* | "{\\slshape" " "*
{ print_s "<em>";
save_state main lexbuf;
print_s "</em>"; main lexbuf }
| "{\\bf" " "* | "{\\sf" " "* | "{\\bfseries" " "* | "{\\sffamily" " "*
{ print_s "<b>";
save_state main lexbuf;
print_s "</b>"; main lexbuf }
| "{\\sc" " "* | "{\\scshape" " "* | "{\\normalfont" " "*
| "{\\upshape" " "* | "{\\mdseries" " "* | "{\\rmfamily" " "*
{ save_state main lexbuf; main lexbuf }
| "{\\tt" " "* | "{\\ttfamily" " "*
{ print_s "<tt>";
save_state main lexbuf;
print_s "</tt>"; main lexbuf }
| "{\\small" " "*
{ print_s "<font size=\"-1\">";
save_state main lexbuf;
print_s "</font>"; main lexbuf }
| "{\\rm" " "*
{ print_s "<span style=\"font-style: normal\">";
save_state main lexbuf;
print_s "</span>"; main lexbuf }
| "{\\cal" " "*
{ save_state main lexbuf; main lexbuf }
| "\\cal" " "* { main lexbuf }
(* Double quotes *)
(***
| '"' { print_s "<tt>"; indoublequote lexbuf;
print_s "</tt>"; main lexbuf }
***)
(* Verb, verbatim *)
| ("\\verb" | "\\path") _
{ verb_delim := Lexing.lexeme_char lexbuf 5;
print_s "<tt>"; inverb lexbuf; print_s "</tt>";
main lexbuf }
| "\\begin{verbatim}"
{ print_s "<pre>"; inverbatim lexbuf;
print_s "</pre>"; main lexbuf }
(* Raw html, latex only *)
| "\\begin{rawhtml}"
{ rawhtml lexbuf; main lexbuf }
| "\\begin{latexonly}"
{ latexonly lexbuf; main lexbuf }
(* Itemize and similar environments *)
| "\\item[" [^ ']']* "]"
{ print_s "<dt>";
let s = Lexing.lexeme lexbuf in
print_s (String.sub s 6 (String.length s - 7));
print_s "<dd>"; main lexbuf }
| "\\item" { print_s "<li>"; main lexbuf }
(* Math mode (hmph) *)
| "$" { math_mode :=
begin
match !math_mode with
| MathNone -> MathNoDisplay
| MathNoDisplay -> MathNone
| MathDisplay -> (* syntax error *) MathNone
end;
main lexbuf }
| "$$" { math_mode :=
begin
match !math_mode with
| MathNone ->
print_s "<blockquote>";
MathDisplay
| MathNoDisplay -> MathNoDisplay
| MathDisplay ->
print_s "\n</blockquote>";
MathNone
end;
main lexbuf }
(* \hkip *)
| "\\hskip" space* dimension
(space* "plus" space* dimension)? (space* "minus" space* dimension)?
{ print_s " "; main lexbuf }
(* Special characters *)
| "\\char" ['0'-'9']+
{ let lxm = Lexing.lexeme lexbuf in
let code = String.sub lxm 5 (String.length lxm - 5) in
print_c(Char.chr(int_of_string code));
main lexbuf }
| "<" { print_s "<"; main lexbuf }
| ">" { print_s ">"; main lexbuf }
| "~" { print_s " "; main lexbuf }
| "``" { print_s "“"; main lexbuf }
| "''" { print_s "”"; main lexbuf }
| "--" { print_s (if !html_entities then "–" else "-");
main lexbuf }
| "---" { print_s (if !html_entities then "—" else "-");
main lexbuf }
| "^" { if is_math_mode() then begin
let buf = Lexing.from_string (raw_arg lexbuf) in
print_s "<sup>";
save_state main buf;
print_s"</sup>"
end else
print_s "^";
main lexbuf }
| "_" { if is_math_mode() then begin
let buf = Lexing.from_string (raw_arg lexbuf) in
print_s "<sub>";
save_state main buf;
print_s"</sub>"
end else
print_s "_";
main lexbuf }
(* URLs *)
| "\\url" { let url = raw_arg lexbuf in
if !hevea_url then
let text = raw_arg lexbuf in print_hevea_url url text
else
print_latex_url url;
main lexbuf }
| "\\" " "
{ print_s " "; main lexbuf }
(* General case for environments and commands *)
| ("\\begin{" | "\\end{") ['A'-'Z' 'a'-'z' '@']+ "}" |
"\\" (['A'-'Z' 'a'-'z' '@']+ '*'? " "? | [^ 'A'-'Z' 'a'-'z'])
{ let rec exec_action = function
| Print str -> print_s str
| Print_arg -> print_arg lexbuf
| Raw_arg f -> f (raw_arg lexbuf)
| Skip_arg -> save_nesting skip_arg lexbuf
| Recursive s -> main (Lexing.from_string s)
| Parameterized f ->
List.iter exec_action (f (raw_arg lexbuf))
in
let m = chop_last_space (Lexing.lexeme lexbuf) in
List.iter exec_action (find_macro m);
main lexbuf }
(* Nesting of braces *)
| '{' { incr brace_nesting; main lexbuf }
| '}' { if !brace_nesting <= 0
then ()
else begin decr brace_nesting; main lexbuf end }
(* Default rule for other characters *)
| eof { () }
| ['A'-'Z' 'a'-'z']+
{ if is_math_mode() then print_s "<em>";
print_s(Lexing.lexeme lexbuf);
if is_math_mode() then print_s "</em>";
main lexbuf }
| _ { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf }
and indoublequote = parse
'"' { () }
| "<" { print_s "<"; indoublequote lexbuf }
| ">" { print_s ">"; indoublequote lexbuf }
| "&" { print_s "&"; indoublequote lexbuf }
| "\\\"" { print_s "\""; indoublequote lexbuf }
| "\\\\" { print_s "\\"; indoublequote lexbuf }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); indoublequote lexbuf }
and inverb = parse
"<" { print_s "<"; inverb lexbuf }
| ">" { print_s ">"; inverb lexbuf }
| "&" { print_s "&"; inverb lexbuf }
| eof { () }
| _ { let c = Lexing.lexeme_char lexbuf 0 in
if c == !verb_delim then ()
else (print_c c; inverb lexbuf) }
and inverbatim = parse
"<" { print_s "<"; inverbatim lexbuf }
| ">" { print_s ">"; inverbatim lexbuf }
| "&" { print_s "&"; inverbatim lexbuf }
| "\\end{verbatim}" { () }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); inverbatim lexbuf }
and rawhtml = parse
"\\end{rawhtml}" { () }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); rawhtml lexbuf }
and latexonly = parse
"\\end{latexonly}" { () }
| eof { () }
| _ { latexonly lexbuf }
and print_arg = parse
"{" { save_nesting main lexbuf }
| "[" { skip_optional_arg lexbuf; print_arg lexbuf }
| " " { print_arg lexbuf }
| eof { () }
| _ { print_c(Lexing.lexeme_char lexbuf 0); main lexbuf }
and skip_arg = parse
"{" { incr brace_nesting; skip_arg lexbuf }
| "}" { decr brace_nesting;
if !brace_nesting > 0 then skip_arg lexbuf }
| "[" { if !brace_nesting = 0 then skip_optional_arg lexbuf;
skip_arg lexbuf }
| " " { skip_arg lexbuf }
| eof { () }
| _ { if !brace_nesting > 0 then skip_arg lexbuf }
and raw_arg = parse
" " { raw_arg lexbuf }
| '{' { nested_arg lexbuf }
| "[" { skip_optional_arg lexbuf; raw_arg lexbuf }
| '\\' ['A'-'Z' 'a'-'z']+
{ Lexing.lexeme lexbuf }
| eof { "" }
| _ { Lexing.lexeme lexbuf }
and nested_arg = parse
'}' { "" }
| '{' { let l = nested_arg lexbuf in
"{" ^ l ^ "}" ^ (nested_arg lexbuf) }
| eof { "" }
| [^ '{' '}']+{ let x = Lexing.lexeme lexbuf in
x ^ (nested_arg lexbuf) }
and skip_optional_arg = parse
"]" { () }
| eof { () }
| _ { skip_optional_arg lexbuf }
(* ajout personnel: [read_macros] pour lire les macros (La)TeX *)
and read_macros = parse
| "\\def" ('\\' ['a'-'z' 'A'-'Z' '@']+ as s) ("#" (['0'-'9']+ as n))?
{ let b = raw_arg lexbuf in
def_macro s n b;
read_macros lexbuf }
| "\\newcommand" space*
"{" ("\\" ['a'-'z' 'A'-'Z']+ as s) "}" ("[" (['0'-'9']+ as n) "]")?
{ let b = raw_arg lexbuf in
def_macro s n b;
read_macros lexbuf }
| "\\let" ('\\' ['a'-'z' 'A'-'Z' '@']+ as s) '='
{ let b = raw_arg lexbuf in
def_macro s None b;
read_macros lexbuf }
| eof
{ () }
| _
{ read_macros lexbuf }
|