File: http_client.mli

package info (click to toggle)
netclient 0.3-OCaml3.04-4
  • links: PTS
  • area: main
  • in suites: woody
  • size: 144 kB
  • ctags: 369
  • sloc: ml: 1,639; makefile: 134
file content (525 lines) | stat: -rw-r--r-- 19,066 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
(* $Id: http_client.mli,v 1.1.1.1 2001/04/12 08:29:58 zack Exp $
 * ----------------------------------------------------------------------
 *
 *)

(**********************************************************************)
(* HTTP/1.1 client                                                    *)
(* written by Gerd Stolpmann                                          *)
(**********************************************************************)

(* Implements much of HTTP/1.1.
 * Implemented:
 *  - chunked messages
 *  - persistent connections
 *  - modular authentication methods, currently Basic and Digest
 * Left out:
 *  - multipart messages, including multipart/byterange
 *  - content encoding (compression)    (1)
 *  - content digests specified by RFC 2068 and 2069   (1)
 *  - conditional and partial GET   (1)
 *  - following code 303 redirections automatically    (1)
 *  - client-side caching   (1)
 *  - HTTP/1.0 persistent connections
 *
 * (1) These features can be implemented on top of this module if really needed,
 *     but there is no special support for them.
 *)

(* RESTRICTED THREAD-SAFETY - NEW SINCE RELEASE 0.3:
 *
 * The module can be compiled such that it is thread-safe. In particular,
 * one has to use the netclient_mt.cm[x]a archive, and thread-safety is
 * restricted to the following kinds of usage:
 * - The golden rule is that threads must not share pipeline objects.
 *   If every thread uses its own pipeline, every thread will have its own
 *   set of state variables.
 *   It is not detected if two threads errornously share a pipeline,
 *   neither by an error message nor by implicit serialization. Strange
 *   things may happen.
 * - The same applies to the other objects: "get", "trace", "options",
 *   "head", "post", "put", "delete", "basic_auth_method", and 
 *   "digest_auth_method". But sharing these objects would make no sense
 *   at all.
 * - Of course, it would be possible to use lots of mutexes to make
 *   the module fully thread-safe. Perhaps in the next release.
 * - The Convenience module serializes; see below.
 *)


(**********************************************************************
 *** NOTE: At the end of this interface specification there is a    ***
 *** SIMPLIFIED interface that is sufficient for many applications. ***
 *** The simplified interface is recommended for beginners.         ***
 **********************************************************************)



exception Header_is_incomplete;;
exception Body_is_incomplete;;
exception Body_maybe_complete;;
exception Http_error of (int * string);;
exception Broken_connection;;   (* Connection closed during request *)

type secret;;
  (* You cannot call methods requiring a parameter of type secret *)

type token;;
  (* internally used *)

type verbose =
    Verbose_status             (* Inform about connection states *)
  | Verbose_request_header     (* Print all request headers *)
  | Verbose_response_header    (* Print all response headers *)
  | Verbose_request_contents   (* Print the request body *)
  | Verbose_response_contents  (* Print the response body *)
;;


(*************************************************************)
(***** class message: core functionality of all messages *****)
(*************************************************************)

class virtual message :
  object
    (* "message" contains data of a request, and if successful, data of the
     * response.
     *)

    method virtual prepare : bool -> unit

    (* public state: *)

    method is_served : bool
	(* true iff request/response cycle was done *)
    method get_host : unit -> string
	(* host name of the content server *)
    method get_port : unit -> int
	(* port number of the content server *)
    method get_req_body : unit -> string
	(* What has been sent as body in the (last) request *)
    method get_req_header : unit -> (string * string) list
	(* What has been sent as header in the (last) request. Returns
	 * (key, value) pairs, where the keys are all in lowercase.
	 *
	 * NOTE ABOUT THE REQUEST HEADER:
	 * The header of a "message" object is initially empty. You can
	 * set header entries using "set_req_header" before the request.
	 * When sending the request some header entries are automatically
	 * added, such as "host", "content-length" and "authorization".
	 *)
    method assoc_req_header : string -> string
	(* Query a specific header entry. The name of the entry must be
	 * given in lowercase characters. 
	 *)
    method set_req_header : string -> string -> unit
	(* Set the request header entry with given "name" to "value". *)
    method get_req_uri : unit -> string
	(* Get the "request URI". This value is only set after doing the
	 * request.
	 *)
    method get_req_method : unit -> string
	(* Get the name of the request method. This value is only set after 
	 * doing the request.
	 *)
    method get_resp_header : unit -> (string * string) list
	(* Get the header of the last response. The keys are in lowercase
	 * characters again.
	 *)
    method assoc_resp_header : string -> string
	(* Query a specific header entry of the response. The name of the
	 * entry must be given in lowercase characters.
	 *)
    method get_resp_body : unit -> string
	(* Returns the body of the last response if the response status
	 * is OK (i.e. the code is in the range 200 to 299).
	 * Otherwise, Http_error (code, body) is raised where 'code' is
	 * the response code and 'body' is the body of the (errorneous)
	 * response.
	 *)
    method dest_status : unit -> (string * int * string)
	(* Returns the status line of the last response (but status lines
	 * with code 100 are ignored).
	 * The returned triple is (http_string, code, text)
	 *)


    (* proxy control: *)

    method no_proxy : unit -> unit
	(* Forces that this request is done without proxy. *)

    method is_proxy_allowed : unit -> bool
	(* Returns if this object would allow a proxy *)

    (* methods for convenience: *)

    method dump_header : string -> (string * string) list -> unit
	(* Writes the given header list to stderr. Every line is
	 * prefixed by the given string.
	 *)

    (* private state: *)

    method init_query : secret -> string -> unit

    method set_served : secret -> unit
    method set_unserved : secret -> unit
    method get_request : secret -> string
    method set_response : secret -> string -> unit

    (* private methods: *)

    method decode_header : secret -> unit
    method decode_header_at : secret -> string -> int -> 
                                ( (string * string) list * int )
    method decode_body : secret -> bool -> bool -> unit
    method body_is_complete : secret -> (string * string) list -> 
                                string -> int -> unit
  end
;;


(*****************************************)
(***** message types by http methods *****)
(*****************************************)

class get : string ->            (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class trace : string -> int ->     
  (* (1) The query, "http://server/path" 
   * (2) maximum number of hops
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class options : string ->        (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class head : string ->           (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class post : string -> (string * string) list ->
  (* (1) The query, "http://server/path"
   * (2) The parameters that are transferred using the mime type
   *     application/x-www-form-urlencoded
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class put : string -> string ->
  (* (1) The query, "http://server/path"
   * (2) The body to be transferred
   *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;


class delete : string ->         (* The query, "http://server/path" *)
  object
    inherit message
    method prepare : bool -> unit
  end
;;



(**********************************)
(***** Authentication methods *****)
(**********************************)

class basic_auth_method :
  object
    val mutable current_realm : string
    method name : string 
    method set_realm : string -> string -> string -> unit
	(* set_realm realm user password:
	 * adds that (user,password) should be used for the given realm
	 *)
    method get_credentials : unit -> (string * string)
	(* get (user,password) for the current realm or raise Not_found.
	 * This method may be overridden. For example, an interactive
	 * application may open a dialoge box to get the credentials of
	 * an unknown realm.
	 *)
    (* The following methods should not be used from outside *)
    method www_authenticate : message -> token list -> unit
    method set_authorization : message -> string -> unit
    method update : message -> token list -> unit
  end
;;


class digest_auth_method :
  object
    inherit basic_auth_method
  end
;;



(**********************************************)
(***** class pipeline: the http processor *****)
(**********************************************)

class pipeline :
  object
    (* A "pipeline" object is a FIFO queue of messages. 
     * Note that in spite of the name no asynchronous pipelining is implemented.
     * This means that the client waits for the response of the last request
     * before a new request is sent to the same host.
     * Perhaps true pipelining is added some day. (The point is that this
     * interface already specifies true pipelining even if this feature is
     * not yet implemented.)
     *
     * A "pipeline" leaves the connection to the last peer (either the
     * content server or the proxy) open and reuses it the next time.
     * Such connections are called "persistent".
     * Note that Web and proxy servers usually have a small timeout for
     * persistent connections (several seconds) and persistency is only
     * worth while if several requests are done immediately in turn.
     *)

    method add_authentication_method : basic_auth_method -> unit
	(* adds an authentication method *)
    method set_proxy : string -> int -> unit
	(* set_proxy name port:
	 * sets that a proxy 'name' listening on 'port' should be used
	 *)
    method set_proxy_auth : string -> string -> unit
	(* sets user and password for the proxy. Only the "basic" authentication
	 * method is implemented.
	 *)
    method avoid_proxy_for : string list -> unit
	(* sets a list of host names or domain suffixes for which no proxy
	 * should be used. 
	 * e.g. [ "localhost"; ".our.net" ]
	 *)
    method avoid_persistent_connection : unit -> unit
	(* sets that for every request a new connection is opened. 
	 * This is not recommended because it decreases performance.
	 *)
    method reset : unit -> unit
	(* Empties the pipeline and closes the connection *)
    method add : message -> unit
	(* Adds the message at the end of the pipeline. The state of the
	 * message is set to "unserved".
	 *)
    method empty : unit -> bool
	(* Is the pipeline empty? *)
    method pipeline : message list
	(* returns the list representing the pipeline *)
    method run : unit -> unit
      (* Runs through the requests in the pipeline. If a request can be
       * fulfilled, i.e. the server sends a response, the state of the
       * request is set and the request is removed from the pipeline.
       * If a request cannot be fulfilled (no response, bad response, 
       * network error), an exception is raised and the request remains in
       * the pipeline (and is even the head of the pipeline). The
       * processing of the pipeline stops in this case.
       *
       * This function handles the following HTTP return codes itself:
       * 100: This is an intermediate return code and simply ignored.
       * 301: If the method is GET or HEAD, the redirection is followed.
       * 302: If the method is GET or HEAD, the redirection is followed.
       *
       * All other return codes remain uninterpreted, it is up to the
       * caller of this function to react on them.
       *
       * Exception Broken_connection:
       *  - The server has closed the connection before the full request
       *    could be sent. It is unclear if something happened or not.
       *    The application should figure out the current state and 
       *    retry the request.
       *  - Also raised if only parts of the response have been received
       *    and the server closed the connection. This is the same problem.
       *    Note that this can only be detected if a "content-length" has
       *    been sent or "chunked encoding" was chosen. Should normally
       *    work for persistent connections.
       *  - NOT raised if the server forces a "broken pipe" (normally
       *    indicates a serious server problem). The intention of
       *    Broken_connection is that retrying the request will probably
       *    succeed.
       *)
    method close_connection : unit -> unit
	(* If there is a pending connection to a server, it is closed. *)
    method abort_connection : unit -> unit
	(* Same as 'close_connection' but does not inform the server about
         * ending the connection. This might lead to timeouts on the server
	 * side.
	 *)
    method verbose : verbose list -> unit
	(* Sets the level of verbosity *)
    method very_verbose : unit -> unit
	(* sets maximum verbosity *)
  end
;;


(**************************************************)
(***** Convenience module for simple purposes *****)
(**************************************************)

(* Do 'open Http_client.Convenience' for simple applications.
 *
 * - The environment variables "http_proxy" and "no_proxy" determine 
 *   the proxy settings. "http_proxy" must be an http-URL that contains
 *   the proxy's name, its port, and optionally user and password.
 *   E.g. "http://eric:eric'spassword@proxy:8080/".
 *   The variable "no_proxy" is a comma-separated list of hosts and
 *   domains for which no proxy must be used.
 *   E.g. "localhost, sun, moon, .intra.net"
 * - There is a default behaviour to authenticate. Both "basic" and "digest"
 *   methods are enabled. Two global variables, http_user and http_password
 *   set the user and password if the URL does not specify them. In the case
 *   that user and password are included in the URL, these values are always
 *   used.
 * - There is a default error behaviour. If a request fails, it is automatically
 *   repeated. The variable http_trials specifies the number of times a request
 *   is submitted at most.
 *   Requests are not repeated if there is a HTTP return code that indicates
 *   a normal operating condition.
 *   POST and DELETE requests are never repeated.
 *)

(* RESTRICTED THREAD SAFETY - NEW SINCE RELEASE 0.3:
 *
 * The Convenience module is fully thread-safe with the exception of the
 * exported variables (http_trials, http_user, and http_password). Note
 * that all threads share the same pipeline, and access to the pipeline
 * is serialized.
 * The latter simply means that it always works, but that threads may 
 * block each other (i.e. the program slows down if more than one thread
 * wants to open http connections at the same time).
 *)


module Convenience :
    sig
      val http_trials : int ref
        (* number of times every request is tried. Default: 3 *)

      val http_user : string ref
	(* The default user if authentication is required *)

      val http_password : string ref
	(* The default password if authentication is required *)

      val http_get_message : string -> message
	(* Does a "GET" request with the given URL and returns the message.
	 * The URL may contain a user and a password, as in
	 * "http://user:password@server.com/path". If authentication is
	 * required by the server, the user and password values from the
	 * URL are taken, if present. Otherwise, http_user and http_password
	 * are used. If http_user is "", authentication fails always.
	 *)
	  
      val http_head_message : string -> message
	(* Does a "HEAD" request with the given URL and returns the reply.
	 * See also http_get_message.
	 *)

      val http_post_message : string -> (string * string) list -> message
	(* Does a "POST" request with the given URL and returns the reply.
	 * The list contains the parameters send with the POST request.
	 * See also http_get_message.
	 *)

      val http_put_message : string -> string -> message
	(* Does a "PUT" request with the given URL and returns the reply.
         * The second argument contains the contents to be put.
	 * See also http_get_message.
	 *)

      val http_delete_message : string -> message
	(* Does a "DELETE" request with the given URL and returns the reply.
	 * See also http_get_message.
	 *)

      val http_get : string -> string
	(* Does a "GET" request with the given URL and returns the message
	 * body. See also http_get_message.
         *)

      val http_post : string -> (string * string) list -> string
	(* Does a "POST" request with the given URL and returns the message
	 * The list contains the parameters send with the POST request.
	 * body. See also http_get_message.
         *)

      val http_put : string -> string -> string
	(* Does a "PUT" request with the given URL and returns the message
	 * body. The second argument contains the contents to be put.
         * See also http_get_message.
         *)

      val http_delete : string -> string
	(* Does a "DELETE" request with the given URL and returns the message
	 * body. See also http_get_message.
         *)

      val http_verbose : unit -> unit
	(* Turns on debug messages on stderr. *)

    end


(* ======================================================================
 * History:
 * 
 * $Log: http_client.mli,v $
 * Revision 1.1.1.1  2001/04/12 08:29:58  zack
 * netclient debian package
 *
 * Revision 1.4  1999/07/08 03:00:03  gerd
 * 	Added comments how to use the new MT support.
 *
 * Revision 1.3  1999/06/10 19:28:01  gerd
 * 	Added Message_maybe_complete.
 *
 * Revision 1.2  1999/06/10 00:12:54  gerd
 * 	Change: The HTTP response codes 301 and 302 ("moved permanently",
 * resp. "moved temporarily") are now interpreted by the 'pipeline' class.
 * (But 303 not (yet?) -- perhaps there should be a switch to turn this
 * feature on or off once it gets implemented...)
 * 	Bugfix: The decision whether a proxy should be used or not works
 * now. The weird function 'dest_query' has gone, it is substituted by
 * 'init_query' which can be called in the initializers of the method
 * subclasses. Thus the query is analyzed very early which is very useful
 * in order to decide if a proxy is needed to reach a certain host.
 * 	Added: There is now a Convenience module which has a simplified
 * interface. It is sufficient for many applications that only want to do
 * simple GET and POST operations.
 *
 * Revision 1.1  1999/03/26 01:16:42  gerd
 * 	initial revision
 *
 * 
 *)