1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
(***********************************************************************)
(* *)
(* HEVEA *)
(* *)
(* Luc Maranget, projet Moscova, INRIA Rocquencourt *)
(* *)
(* Copyright 2001 Institut National de Recherche en Informatique et *)
(* Automatique. Distributed only by permission. *)
(* *)
(* $Id: htmlparse.ml,v 1.11 2008-01-22 18:08:37 maranget Exp $ *)
(***********************************************************************)
open Lexeme
open Tree
exception Error of string
module Make(C:DoOut.Config) = struct
let error msg _lb = raise (Error msg)
;;
module Out = DoOut.Make(C)
module Lex = Htmllex.Make(C)
let buff = ref None
let next_token lexbuf = match !buff with
| Some tok -> buff := None ; tok
| None -> Lex.next_token lexbuf
and put_back lexbuf tok = match !buff with
| None -> buff := Some tok
| _ -> error "Put back" lexbuf
let txt_buff = Out.create_buff ()
let rec to_close tag lb = match next_token lb with
| Close (t,_) as tok when t=tag -> tok
| Open (t,_,txt) when t=tag ->
Out.put txt_buff txt ;
Out.put txt_buff (Htmllex.to_string (to_close tag lb)) ;
to_close tag lb
| Eof -> error ("Eof in to_close") lb
| tok ->
Out.put txt_buff (Htmllex.to_string tok);
to_close tag lb
let rec tree cls lexbuf =
match next_token lexbuf with
| (Eof|Close (_,_)) as tok-> put_back lexbuf tok ; None
| Open (STYLE,_,txt) ->
let otxt = txt
and ctxt = Htmllex.to_string (to_close STYLE lexbuf) in
let txt = Out.to_string txt_buff in
let txt = match cls with
| None -> txt
| Some cls ->
let css = Lex.styles (MyLexing.from_string txt) in
let buff = Out.create_buff () in
Out.put_char buff '\n' ;
List.iter
(fun cl -> match cl with
| Css.Other txt ->
Out.put buff txt ;
Out.put_char buff '\n'
| Css.Class (name, addname, txt) ->
if Emisc.Strings.mem name cls then begin
Out.put_char buff '.' ;
Out.put buff name ;
begin match addname with
| None -> ()
| Some n ->
Out.put_char buff ' ' ;
Out.put buff n
end ;
Out.put buff txt ;
Out.put_char buff '\n'
end)
css ;
Out.to_string buff in
Some (Text (otxt^txt^ctxt))
| Open (SCRIPT,_,txt) ->
Out.put txt_buff txt ;
Out.put txt_buff (Htmllex.to_string (to_close SCRIPT lexbuf)) ;
Some (Text (Out.to_string txt_buff))
| Open (tag,attrs,txt) ->
let fils = trees cls lexbuf in
begin match next_token lexbuf with
| Close (ctag,ctxt) when tag=ctag ->
Some
(match tag with
| A|SUP|SUB ->
ONode (txt,ctxt,fils)
| _ ->
Node
({tag=tag ; attrs=attrs ; txt=txt ; ctxt=ctxt},fils))
| tok ->
error (Htmllex.to_string tok ^ " closes "^txt) lexbuf
end
| Lexeme.Text txt -> Some (Text txt)
| Lexeme.Blanks txt -> Some (Blanks txt)
and trees cls lexbuf = match tree cls lexbuf with
| None -> []
| Some t -> t::trees cls lexbuf
let rec do_main cls lexbuf = match tree cls lexbuf with
| None ->
begin match next_token lexbuf with
| Eof -> []
| tok -> error ("Unexpected " ^ Htmllex.to_string tok) lexbuf
end
| Some (Text _ as last) -> [last]
| Some t -> t :: do_main cls lexbuf
let ptop () = Lex.ptop ()
let reset () =
Lex.reset() ;
Out.reset txt_buff
let main cls lexbuf =
try
do_main cls lexbuf
with
| e -> reset () ; raise e
let classes = Lex.classes
end
|