File: loadpage.ml

package info (click to toggle)
perl4caml 0.9.5-8
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 540 kB
  • sloc: ml: 1,572; ansic: 957; makefile: 186; perl: 45
file content (60 lines) | stat: -rw-r--r-- 1,574 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
(* Example program which uses LWP::UserAgent and HTML::TreeBuilder to
 * download an HTTP page and parse it.
 * Copyright (C) 2003 Merjis Ltd.
 * $Id: loadpage.ml,v 1.5 2003/12/11 17:41:52 rich Exp $
 *)

open Printf

open Pl_LWP_UserAgent
open Pl_HTTP_Request
open Pl_HTML_TreeBuilder
open Pl_HTML_Element

let () =
  let site =
    if Array.length Sys.argv >= 2 then
      Sys.argv.(1)
    else
      "http://www.merjis.com/" in

  (* Create the UserAgent object. *)
  let ua = Pl_LWP_UserAgent.new_ ~env_proxy:true () in

  (* Fetch the page. *)
  let req = Pl_HTTP_Request.new_ "GET" ~uri:site () in
  let res = ua#request req in

  if not res#is_success then
    failwith ("Error while fetching " ^ site ^ ": " ^ res#status_line);

  (* Extract the content of the page. *)
  let content = res#content in

  (* Parse it using HTML::TreeBuilder. *)
  let tree = Pl_HTML_TreeBuilder.new_from_content content in

  (* Turn the tree into an HTML::Element. *)
  let tree = tree#elementify in

  (* Print out the resulting tree. *)
  let rec print root =
    let tag = root#tag in
    let attrs = root#all_external_attr in
    let subnodes = root#content_list in

    printf "Start tag: %s\n" tag;
    List.iter (fun (name, value) ->
		 printf "\tAttr: %s=\"%s\"\n" name value) attrs;

    List.iter (fun node ->
		 match node with
		     Element node -> print node
		   | String str ->
		       printf "String: %s\n" str) subnodes;
    printf "End tag: %s\n" tag
  in
  print tree;

  (* Perform a full collection - good way to find GC/allocation bugs. *)
  Gc.full_major ()