1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
(* Example program which uses LWP::UserAgent and HTML::TreeBuilder to
* download an HTTP page and parse it.
* Copyright (C) 2003 Merjis Ltd.
* $Id: loadpage.ml,v 1.5 2003/12/11 17:41:52 rich Exp $
*)
open Printf
open Pl_LWP_UserAgent
open Pl_HTTP_Request
open Pl_HTML_TreeBuilder
open Pl_HTML_Element
let () =
let site =
if Array.length Sys.argv >= 2 then
Sys.argv.(1)
else
"http://www.merjis.com/" in
(* Create the UserAgent object. *)
let ua = Pl_LWP_UserAgent.new_ ~env_proxy:true () in
(* Fetch the page. *)
let req = Pl_HTTP_Request.new_ "GET" ~uri:site () in
let res = ua#request req in
if not res#is_success then
failwith ("Error while fetching " ^ site ^ ": " ^ res#status_line);
(* Extract the content of the page. *)
let content = res#content in
(* Parse it using HTML::TreeBuilder. *)
let tree = Pl_HTML_TreeBuilder.new_from_content content in
(* Turn the tree into an HTML::Element. *)
let tree = tree#elementify in
(* Print out the resulting tree. *)
let rec print root =
let tag = root#tag in
let attrs = root#all_external_attr in
let subnodes = root#content_list in
printf "Start tag: %s\n" tag;
List.iter (fun (name, value) ->
printf "\tAttr: %s=\"%s\"\n" name value) attrs;
List.iter (fun node ->
match node with
Element node -> print node
| String str ->
printf "String: %s\n" str) subnodes;
printf "End tag: %s\n" tag
in
print tree;
(* Perform a full collection - good way to find GC/allocation bugs. *)
Gc.full_major ()
|