File: http_pipeline.ml

package info (click to toggle)
netclient 0.91-10
links: PTS
area: main
in suites: etch, etch-m68k
size: 2,096 kB
ctags: 1,539
sloc: ml: 8,808; sh: 527; makefile: 203
file content (114 lines) | stat: -rw-r--r-- 4,332 bytes
parent folder | download | duplicates (6)
#require "netclient";;

(* This example shows how to use the HTTP protocol pipeline. The pipeline
 * is a queue of HTTP requests which is tried to carry out in an optimal
 * way. This includes:
 * - The pipeline automatically opens several network connections if
 *   several servers occur in the queue. It also opens several connections
 *   to the same server. All these connections are handled in parallel.
 * - The pipeline sends several requests at once over a connection, if
 *   the other side supports that. This is called "pipeline mode", and
 *   the feature that gave the pipeline class its name. (And it boosts the
 *   performance if you download many small files!)
 * - If network errors occur, the pipeline repeats the request several
 *   times before giving up (the details of this feature can be configured)
 * - Redirections are followed automatically.
 * Once a request could be processed, a callback function is invoked.
 *
 * Note that pipelines should not be too long. It is a good idea to
 * add requests to the pipeline only if the number of open
 * connections is not too high (the best number depends on the quality
 * of the network connection, and the CPU load). For example, it is
 * possible to keep thousands of connections open at the same time,
 * but it does not make sense if the network is saturated because of this.
 *)

open Http_client;;

let got_response message =
  (* This function is called when the response has arrived, or a timeout
   * happened. 
   *)
  let uri = message # get_uri() in
  try
    let (http_version_string, code, text) = message # dest_status() in
    (* Get the status line.
     * http_version_string: The identifier for the protocol, e.g. "HTTP/1.1"
     * code: The status code. code >= 200 && code <= 299 means "ok".
     * text: The explanation for the code.
     * dest_status will raise the exception Http_protocol if there is not
     * a valid response.
     *)
    Printf.printf "Response for %s: %d %s\n" uri code text;
    flush stdout;
    (* Now we could get the contents of the response using:
     * let s = message # get_resp_body(),
     * or we could get the returned MIME header as in
     * let content_type = message # assoc_resp_header "content-type"
     *)
  with
      Http_protocol No_reply ->
	(* For some reason, there was no reply. For example, because the
	 * preceding message in the pipeline caused trouble, and so the
	 * response for this message could not be received.
	 *)
	Printf.printf "No response for %s\n" uri;
	flush stdout;
    | Http_protocol (Failure s) ->
	(* An error occurred *)
	Printf.printf "Error for %s: %s\n" uri s;
	flush stdout;
    | Http_protocol (Bad_message s) ->
	(* An error occurred *)
	Printf.printf "Got bad message for %s: %s\n" uri s;
	flush stdout;
    | Http_protocol (Unix.Unix_error(e,fname,param)) ->
	Printf.printf "Network error for %s: %s\n" uri (Unix.error_message e);
	flush stdout;
    | Http_protocol other ->
	Printf.printf "Other exception for %s: %s\n" uri
	  (Printexc.to_string other);
	flush stdout;
;;


let get_several_urls url_list =
  (* A pipeline is just a container for the requests and responses. *)
  let p = new pipeline in
  (* Configure p: *)
  p # set_proxy_from_environment();  (* Respect "http_proxy", "no_proxy" *)
  (* Set some verbosity: 
   * let opts = p # get_options in
   * p # set_options { opts with verbose_connection = true };
   *)
  (* Add the requests to the pipeline. The requests are only collected,
   * no network I/O happens.
   *)
  List.iter
    (fun url ->
       let message = new get url in
       (* message: a container for the request AND the corresponding response *)
       p # add_with_callback 
	 message
	 got_response;   (* This function is called when the response arrives *)
       (* Note: add_with_callback will raise an exception immediately for
	* DNS errors ("host name lookup failed" etc.). The DNS lookups
	* are done synchronously.
	*)
    )
    url_list;
  (* Now start the pipeline. "run" returns when the job is done, i.e. all
   * requests have been processed. If there is an exception, we can print
   * it and restart "run".
   *)
  let rec go_ahead() =
    try
      p # run()
    with
	err ->
	  print_endline ("Uncaught exception: " ^ Printexc.to_string err);
	  flush stdout;
	  go_ahead()
  in
  go_ahead()
;;