File: mimestring.ml

package info (click to toggle)
netstring 0.10.1-3
links: PTS
area: main
in suites: woody
size: 1,000 kB
ctags: 895
sloc: ml: 8,389; xml: 416; makefile: 188; sh: 103
file content (1035 lines) | stat: -rw-r--r-- 28,188 bytes
(* $Id: mimestring.ml,v 1.9 2001/07/01 10:25:10 gerd Exp $
 * ----------------------------------------------------------------------
 *
 *)

module Str = Netstring_str;;

let cr_or_lf_re = Str.regexp "[\013\n]";;

let trim_right_spaces_re =
  Str.regexp "[ \t]+$";;

let trim_left_spaces_re =
  Str.regexp "^[ \t]+";;

let header_re =
  Str.regexp "\\([^ \t\r\n:]+\\):\\([ \t]*.*\n\\([ \t].*\n\\)*\\)";;

let empty_line_re =
  Str.regexp "\013?\n";;

let end_of_header_re =
  Str.regexp "\n\013?\n";;


let scan_header ?(unfold=true) parstr ~start_pos:i0 ~end_pos:i1 =
  let rec parse_header i l =
    match Str.string_match header_re parstr i with
	Some r ->
	  let i' = Str.match_end r in
	  if i' > i1 then
	    failwith "Mimestring.scan_header";
	  let name = String.lowercase(Str.matched_group r 1 parstr) in
	  let value_with_crlf =
	    Str.matched_group r 2 parstr in
	  let value =
	    if unfold then begin
	      let value_with_rspaces =
		Str.global_replace cr_or_lf_re "" value_with_crlf in
	      let value_with_lspaces =
		Str.global_replace trim_right_spaces_re "" value_with_rspaces in
	      Str.global_replace trim_left_spaces_re "" value_with_lspaces 
	    end
	    else value_with_crlf
	  in
	  parse_header i' ( (name,value) :: l)
      | None ->
	  (* The header must end with an empty line *)
	  begin match Str.string_match empty_line_re parstr i with
	      Some r' ->
		List.rev l, Str.match_end r'
	    | None ->
		failwith "Mimestring.scan_header"
	  end
  in
  parse_header i0 []
;;

type s_token =
    Atom of string
  | EncodedWord of (string * string * string)
  | QString of string
  | Control of char
  | Special of char
  | DomainLiteral of string
  | Comment
  | End
;;

type s_option =
    No_backslash_escaping
  | Return_comments
  | Recognize_encoded_words
;;

type s_extended_token =
    { token      : s_token;
      token_pos  : int;
      token_line : int;
      token_linepos : int;   (* Position of the beginning of the line *)
      token_len  : int;
      mutable token_sep : bool; (* separates adjacent encoded words *)
    }
;;

let get_token et  = et.token;;
let get_pos et    = et.token_pos;;
let get_line et   = et.token_line;;
let get_column et = et.token_pos - et.token_linepos;;
let get_length et = et.token_len;;
let separates_adjacent_encoded_words et = et.token_sep;;

let get_decoded_word et =
  match et.token with
      Atom s -> s
    | QString s -> s
    | Control c -> String.make 1 c
    | Special c -> String.make 1 c
    | DomainLiteral s -> s
    | Comment -> ""
    | EncodedWord (_, encoding, content) ->
	( match encoding with
	      ("Q"|"q") ->
		Netencoding.Q.decode content
	    | ("B"|"b") -> 
		Netencoding.Base64.decode 
		  ~url_variant:false
		  ~accept_spaces:false
		  content
	    | _ -> failwith "get_decoded_word"
	)
    | End -> 
	failwith "get_decoded_word"
;;

let get_charset et =
  match et.token with
      EncodedWord (charset, _, _) -> charset
    | End -> failwith "get_charset"
    | _ -> "US-ASCII"
;;

type scanner_spec =
    { (* What the user specifies: *)
      scanner_specials : char list;
      scanner_options : s_option list;
      (* Derived from that: *)
      mutable opt_no_backslash_escaping : bool;
      mutable opt_return_comments : bool;
      mutable opt_recognize_encoded_words : bool;

      mutable is_special : bool array;
      mutable space_is_special : bool;
    }
;;

type scanner_target =
    { scanned_string : string;
      mutable scanner_pos : int;
      mutable scanner_line : int;
      mutable scanner_linepos : int; 
      (* Position of the beginning of the line *)
      mutable scanned_tokens : s_extended_token Queue.t;
      (* A queue of already scanned tokens in order to look ahead *)
      mutable last_token : s_token;
      (* The last returned token. It is only important whether it is
       * EncodedWord or not.
       *)
    }
;;

type mime_scanner = scanner_spec * scanner_target
;;

let get_pos_of_scanner (spec, target) = 
  if spec.opt_recognize_encoded_words then
    failwith "get_pos_of_scanner"
  else
    target.scanner_pos
;;

let get_line_of_scanner (spec, target) = 
  if spec.opt_recognize_encoded_words then
    failwith "get_line_of_scanner"
  else
    target.scanner_line
;;

let get_column_of_scanner (spec, target) = 
  if spec.opt_recognize_encoded_words then
    failwith "get_column_of_scanner"
  else
    target.scanner_pos - target.scanner_linepos 
;;

let create_mime_scanner ~specials ~scan_options =
  let is_spcl = Array.create 256 false in
  List.iter
    (fun c -> is_spcl.( Char.code c ) <- true)
    specials;
  let spec =
    { scanner_specials = specials;
      scanner_options = scan_options;
      opt_no_backslash_escaping = 
	List.mem No_backslash_escaping scan_options;
      opt_return_comments = 
	List.mem Return_comments scan_options;
      opt_recognize_encoded_words = 
	List.mem Recognize_encoded_words scan_options;
      is_special = is_spcl;
      space_is_special = is_spcl.(32);
    }
  in
  (* Grab the remaining arguments: *)
  fun ?(pos=0) ?(line=1) ?(column=0) s ->
    let target =
      { scanned_string = s;
	scanner_pos = pos;
	scanner_line = line;
	scanner_linepos = pos - column;
	scanned_tokens = Queue.create();
	last_token = Comment;   (* Must not be initialized with EncodedWord *)
      }
    in
    spec, target
;;


let encoded_word_re =
  Str.regexp "=\\?\\([^?]+\\)\\?\\([^?]+\\)\\?\\([^?]+\\)\\?=";;

let scan_next_token ((spec,target) as scn) =
  let mk_pair t len =
    { token = t;
      token_pos = target.scanner_pos;
      token_line = target.scanner_line;
      token_linepos = target.scanner_linepos;
      token_len = len;
      token_sep = false;
    },
    t
  in

  (* Note: mk_pair creates a new token pair, and it assumes that 
   * target.scanner_pos (and also scanner_line and scanner_linepos)
   * still contain the position of the beginning of the token.
   *)

  let s = target.scanned_string in
  let l = String.length s in
  let rec scan i =
    if i < l then begin
      let c = s.[i] in
      if spec.is_special.( Char.code c ) then begin
	let pair = mk_pair (Special c) 1 in
	target.scanner_pos <- target.scanner_pos + 1;
	(match c with
	     '\n' -> 
	       target.scanner_line    <- target.scanner_line + 1;
	       target.scanner_linepos <- target.scanner_pos;
	   | _ -> ()
	);
	pair
      end
      else
	match c with
	    '"' -> 
	      (* Quoted string: *)
	      scan_qstring (i+1) (i+1) 0
	  | '(' ->
	      (* Comment: *)
	      let i', line, linepos = 
		scan_comment (i+1) 0 target.scanner_line target.scanner_linepos
	      in
	      let advance() =
		target.scanner_pos <- i';
		target.scanner_line <- line;
		target.scanner_linepos <- linepos
	      in
	      if spec.opt_return_comments then begin
		let pair = mk_pair Comment (i' - i) in
		advance();
		pair
	      end
	      else 
		if spec.space_is_special then begin
		  let pair = mk_pair (Special ' ') (i' - i) in
		  advance();
		  pair
		end
		else begin
		  advance();
		  scan i'
		end
	  | (' '|'\t'|'\r') ->
	      (* Ignore whitespace by default: *)
	      target.scanner_pos <- target.scanner_pos + 1;
	      scan (i+1)
	  | '\n' ->
	      (* Ignore whitespace by default: *)
	      target.scanner_pos     <- target.scanner_pos + 1;
	      target.scanner_line    <- target.scanner_line + 1;
	      target.scanner_linepos <- target.scanner_pos;
	      scan (i+1)
	  | ('\000'..'\031'|'\127'..'\255') ->
	      let pair = mk_pair (Control c) 1 in
	      target.scanner_pos <- target.scanner_pos + 1;
	      pair
	  | '[' ->
	      (* Domain literal: *)
	      scan_dliteral (i+1) (i+1) 0
	  | _ ->
	      scan_atom i i
    end
    else 
      mk_pair End 0

  and scan_atom i0 i =
    let return_atom() =
      let astring = String.sub s i0 (i-i0) in
      let r =
	if spec.opt_recognize_encoded_words then
	  Str.string_match ~groups:4 encoded_word_re astring 0
	else
	  None
      in
      match r with
	  None ->
	    (* An atom contains never a linefeed character, so we can ignore
	     * scanner_line here.
	     *)
	    let pair = mk_pair (Atom astring) (i-i0) in
	    target.scanner_pos <- i;
	    pair
	| Some mr ->
	    (* Found an encoded word. *)
	    let charset  = Str.matched_group mr 1 astring in
	    let encoding = Str.matched_group mr 2 astring in
	    let content  = Str.matched_group mr 3 astring in
	    let t = EncodedWord(String.uppercase charset,
				 String.uppercase encoding,
				 content) in
	    let pair = mk_pair t (i-i0) in
	    target.scanner_pos <- i;
	    pair
    in

    if i < l then
      let c = s.[i] in
      match c with
	  ('\000'..'\031'|'\127'..'\255'|'"'|'('|'['|' '|'\t'|'\r'|'\n') ->
	    return_atom()
	| _ ->
	    if spec.is_special.( Char.code c ) then
	      return_atom()
	    else
	      scan_atom i0 (i+1)
    else
      return_atom()

  and scan_qstring i0 i n =
    if i < l then
      let c = s.[i] in
      match c with
	  '"' ->
	    (* Regular end of the quoted string: *)
	    let content, line, linepos = copy_qstring i0 (i-1) n in
	    let pair = mk_pair (QString content) (i-i0+2) in
	    target.scanner_pos <- i+1;
	    target.scanner_line <- line;
	    target.scanner_linepos <- linepos;
	    pair
	| '\\' when not spec.opt_no_backslash_escaping ->
	    scan_qstring i0 (i+2) (n+1)
	| _ ->
	    scan_qstring i0 (i+1) (n+1)
    else
      (* Missing right double quote *)
      let content, line, linepos = copy_qstring i0 (l-1) n in
      let pair = mk_pair (QString content) (l-i0+1) in
      target.scanner_pos <- l;
      target.scanner_line <- line;
      target.scanner_linepos <- linepos;
      pair

  and copy_qstring i0 i1 n =
    (* Used for quoted strings and for domain literals *)
    let r = String.create n in
    let k = ref 0 in
    let line = ref target.scanner_line in
    let linepos = ref target.scanner_linepos in
    for i = i0 to i1 do
      let c = s.[i] in
      match c with
	  '\\' when i < i1 &&  not spec.opt_no_backslash_escaping -> ()
	| '\n' ->
	    line := !line + 1;
	    linepos := i+1;
	    r.[ !k ] <- c; 
	    incr k
	| _ -> 
	    r.[ !k ] <- c; 
	    incr k
    done;
    assert (!k = n);
    r, !line, !linepos

  and scan_dliteral i0 i n =
    if i < l then
      let c = s.[i] in
      match c with
	  ']' ->
	    (* Regular end of the domain literal: *)
	    let content, line, linepos = copy_qstring i0 (i-1) n in
	    let pair = mk_pair (DomainLiteral content) (i-i0+2) in
	    target.scanner_pos <- i+1;
	    target.scanner_line <- line;
	    target.scanner_linepos <- linepos;
	    pair
	| '\\' when not spec.opt_no_backslash_escaping ->
	    scan_dliteral i0 (i+2) (n+1)
	| _ ->
	    (* Note: '[' is not allowed by RFC 822; we treat it here as
	     * a regular character (questionable)
	     *)
	    scan_dliteral i0 (i+1) (n+1)
    else
      (* Missing right bracket *)
      let content, line, linepos = copy_qstring i0 (l-1) n in
      let pair = mk_pair (DomainLiteral content) (l-i0+1) in
      target.scanner_pos <- l;
      target.scanner_line <- line;
      target.scanner_linepos <- linepos;
      pair


  and scan_comment i level line linepos =
    if i < l then
      let c = s.[i] in
      match c with
	  ')' ->
	    (i+1), line, linepos
	| '(' ->
	    (* nested comment *)
	    let i', line', linepos' = 
	      scan_comment (i+1) (level+1) line linepos 
	    in
	    scan_comment i' level line' linepos'
	| '\\' when not spec.opt_no_backslash_escaping ->
	    if (i+1) < l && s.[i+1] = '\n' then
	      scan_comment (i+2) level (line+1) (i+2)
	    else
	      scan_comment (i+2) level line linepos
	| '\n' ->
	    scan_comment (i+1) level (line+1) (i+1)
	| _ ->
	    scan_comment (i+1) level line linepos
    else
      (* Missing closing ')' *)
      i, line, linepos
  in

  scan target.scanner_pos
;;


let scan_token ((spec,target) as scn) =
  (* This function handles token queueing in order to recognize white space
   * that separates adjacent encoded words.
   *)

  let rec collect_whitespace () =
    (* Scans whitespace tokens and returns them as:
     * (ws_list, other_tok)     if there is some other_tok following the
     *                          list (other_tok = End is possible)
     *)
    let (et, t) as pair = scan_next_token scn in
    ( match t with
	  (Special ' '|Special '\t'|Special '\n'|Special '\r') ->
	    let ws_list, tok = collect_whitespace() in
	    pair :: ws_list, tok
	| _ ->
	    [], pair
    )
  in

  try
    (* Is there an already scanned token in the queue? *)
    let et = Queue.take target.scanned_tokens in
    let t = et.token in
    target.last_token <- t;
    et, et.token
  with
      Queue.Empty ->
	(* If not: inspect the last token. If that token is an EncodedWord,
	 * the next tokens are scanned in advance to determine if there
	 * are spaces separating two EncodedWords. These tokens are put
	 * into the queue such that it is avoided that they are scanned
	 * twice. (The sole purpose of the queue.)
	 *)
	match target.last_token with
	    EncodedWord(_,_,_) as ew ->
	      let ws_list, tok = collect_whitespace() in
	      (* If tok is an EncodedWord, too, the tokens in ws_list must
	       * be flagged as separating two adjacent encoded words. 
	       *)
	      ( match tok with
		    _, EncodedWord(_,_,_) ->
		      List.iter
			(fun (et,t) ->
			   et.token_sep <- true)
			ws_list
		  | _ ->
		      ()
	      );
	      (* Anyway, queue the read tokens but the first up *)
	      ( match ws_list with
		    [] ->
		      (* Nothing to queue *)
		      let et, t = tok in
		      target.last_token <- t;
		      tok
		  | (et,t) as pair :: ws_list' ->
		      List.iter
			(fun (et',_) -> 
			   Queue.add et' target.scanned_tokens)
			ws_list';
		      ( match tok with
			  | _, End ->
			      ()
			  | (et',_) ->
			      Queue.add et' target.scanned_tokens
		      );
		      (* Return the first scanned token *)
		      target.last_token <- t;
		      pair
	      )
	  | _ ->
	      (* Regular case: Scan one token; do not queue it up *)
	      let (et, t) as pair = scan_next_token scn in 
	      target.last_token <- t;
	      pair
;;
	

let scan_token_list scn =
  let rec collect() =
    match scan_token scn with
	_, End ->
	  []
      | pair ->
	  pair :: collect()
  in
  collect()
;;


let scan_structured_value s specials options =
  let rec collect scn =
    match scan_token scn with
	_, End ->
	  []
      | _, t ->
	  t :: collect scn
  in
  let scn = create_mime_scanner specials options s in
  collect scn
;;


let specials_rfc822 =
  [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '.' ];;


let specials_rfc2045 =
  [ '<'; '>'; '@'; ','; ';'; ':'; '\\'; '/' ];;


let scan_encoded_text_value s =
  let specials = [ ' '; '\t'; '\r'; '\n'; '('; '['; '"' ] in
  let options =  [ Recognize_encoded_words ] in
  let scn = create_mime_scanner specials options s in
  
  let rec collect () =
    match scan_token scn with
	_, End ->
	  []
      | et, _ when separates_adjacent_encoded_words et ->
	  collect()
      | et, (Special _|Atom _|EncodedWord(_,_,_)) ->
	  et :: collect ()
      | _, _ ->
	  assert false
  in
  collect()
;;


let scan_value_with_parameters s options =
  let rec parse_params tl =
    match tl with
	Atom n :: Special '=' :: Atom v :: tl' ->
	  (n,v) :: parse_rest tl'
      | Atom n :: Special '=' :: QString v :: tl' ->
	  (n,v) :: parse_rest tl'
      | _ ->
	  failwith "Mimestring.scan_value_with_parameters"
  and parse_rest tl =
    match tl with
	[] -> []
      | Special ';' :: tl' ->
	  parse_params tl'
      | _ ->
	  failwith "Mimestring.scan_value_with_parameters"
  in

  (* Note: Even if not used here, the comma is a very common separator
   * and should be recognized as being special. You will get a
   * failure if there is a comma in the scanned string.
   *)
  let tl = scan_structured_value s [ ';'; '='; ',' ] options in
  match tl with
      [ Atom n ] -> n, []
    | [ QString n ] -> n, []
    | Atom n :: Special ';' :: tl' ->
	n, parse_params tl'
    | QString n :: Special ';' :: tl' ->
	n, parse_params tl'
    | _ ->
	failwith "Mimestring.scan_value_with_parameters"
;;


let scan_mime_type s options =
  let n, params = scan_value_with_parameters s options in
  (String.lowercase n),
  (List.map (fun (n,v) -> (String.lowercase n, v)) params)
;;


let lf_re = Str.regexp "[\n]";;

let scan_multipart_body s ~start_pos:i0 ~end_pos:i1 ~boundary =
  let l_s = String.length s in
  if i0 < 0 or i1 < 0 or i0 > l_s or i1 >l_s then
    invalid_arg "Mimestring.scan_multipart_body";

  (* First compile the regexps scanning for 'boundary': *)
  let boundary1_re =
    Str.regexp ("\n--" ^ Str.quote boundary) in
  let boundary2_re =
    Str.regexp ("--" ^ Str.quote boundary) in

  let rec parse i =
    (* i: Beginning of the current part (position directly after the
     * boundary line
     *)
    (* Search for next boundary at position i *)
    let i' =
      try min (fst (Str.search_forward boundary1_re s i) + 1) i1
      with
	  Not_found -> i1
    in
    (* i': Either the position of the first '-' of the boundary line,
     *     or i1 if no boundary has been found
     *)
    if i' >= i1 then
      [] (* Ignore everything after the last boundary *)
    else
      let i'' =
	try min (fst (Str.search_forward lf_re s i') + 1) i1
	with
	    Not_found -> i1
      in
      (* i'': The position after the boundary line *)
(*
      print_int i; print_newline();
      print_int i'; print_newline();
      print_int i''; print_newline();
      flush stdout;
*)
      let header, k = scan_header s i i' in
      (* header: the header of the part
       * k: beginning of the body
       *)

      let value =
	(* We know that i'-1 is a linefeed character. i'-2 should be a CR
	 * character. Both characters are not part of the value.
	 *)
	if i' >= 2 then
	  match s.[i'-2] with
	      '\013' -> String.sub s k (i'-2-k)
	    | _      -> String.sub s k (i'-1-k)
	else
	  String.sub s k (i'-1-k)
      in

      let pair =
	(header, value) in

      if i'' >= i1
      then
	[ pair ]
      else
      	pair :: parse i''
  in

  (* Find the first boundary. This is a special case, because it may be
   * right at the beginning of the string (no preceding CRLF)
   *)

  let i_bnd =
    if Str.string_match boundary2_re s i0 <> None then
      i0
    else
      try min (fst (Str.search_forward boundary1_re s i0)) i1
      with
	  Not_found -> i1
  in

  if i_bnd >= i1 then
    []
  else
    let i_bnd' =
      try min (fst (Str.search_forward lf_re s (i_bnd + 1)) + 1) i1
      with
	  Not_found -> i1
    in
    if i_bnd' >= i1 then
      []
    else
      parse i_bnd'
;;


let scan_multipart_body_and_decode s ~start_pos:i0 ~end_pos:i1 ~boundary =
  let parts = scan_multipart_body s i0 i1 boundary in
  List.map
    (fun (params, value) ->
       let encoding =
	 try List.assoc "content-transfer-encoding" params
	 with Not_found -> "7bit"
       in

       (* NOTE: In the case of "base64" and "quoted-printable", the allocation
	* of the string "value" could be avoided.
	*)

       let value' =
	 match encoding with
	     ("7bit"|"8bit"|"binary") -> value
	   | "base64" ->
	       Netencoding.Base64.decode_substring
		 value 0 (String.length value) false true
	   | "quoted-printable" ->
	       Netencoding.QuotedPrintable.decode_substring
		 value 0 (String.length value)
	   | _ ->
	       failwith "Mimestring.scan_multipart_body_and_decode: Unknown content-transfer-encoding"
       in
       (params, value')
    )
    parts
;;


let scan_multipart_body_from_netstream s ~boundary ~create ~add ~stop =

  (* The block size of s must be at least the length of the boundary + 3.
   * Otherwise it is not guaranteed that the boundary is always recognized.
   *)
  if Netstream.block_size s < String.length boundary + 3 then
    invalid_arg "Mimestring.scan_multipart_body_from_netstream";

  (* First compile the regexps scanning for 'boundary': *)
  let boundary1_re =
    Str.regexp ("\n--" ^ Str.quote boundary) in
  let boundary2_re =
    Str.regexp ("--" ^ Str.quote boundary) in

  (* Subtask 1: Search the end of the MIME header: CR LF CR LF
   *            (or LF LF). Enlarge the window until the complete header
   *            is covered by the window.
   *)
  let rec search_end_of_header k =
    (* Search the end of the header beginning at position k of the
     * current window.
     * Return the position of the first character of the body.
     *)
    try
      (* Search for LF CR? LF: *)
      let i, r = Str.search_forward
		   end_of_header_re
		   (Netbuffer.unsafe_buffer (Netstream.window s))
		   k
      in
      (* If match_end <= window_length, the search was successful.
       * Otherwise, we searched in the uninitialized region of the
       * buffer.
       *)
      if Str.match_end r <= Netstream.window_length s then
	Str.match_end r
      else
	raise Not_found
    with
	Not_found ->
	  (* If the end of the stream is reached, the end of the header
	   * is missing: Error.
	   * Otherwise, we try to read another block, and continue.
	   *)
	  if Netstream.at_eos s then
	    failwith "Mimestring.scan_multipart_body_from_netstream: Unexpected end of stream";
	  let w0 = Netstream.window_length s in
	  Netstream.want_another_block s;
	  search_end_of_header (max (w0 - 2) 0)
  in

  (* Subtask 2: Search the first boundary line. *)
  let rec search_first_boundary() =
    (* Search boundary per regexp; return the position of the character
     * immediately following the boundary (on the same line), or
     * raise Not_found.
     *)
    try
      (* Search boundary per regexp: *)
      let i, r = Str.search_forward
		   boundary1_re
		   (Netbuffer.unsafe_buffer (Netstream.window s))
		   0
      in
      (* If match_end <= window_length, the search was successful.
       * Otherwise, we searched in the uninitialized region of the
       * buffer.
       *)
      if Str.match_end r <= Netstream.window_length s then begin
	Str.match_end r
      end
      else raise Not_found
    with
	Not_found ->
	  if Netstream.at_eos s then raise Not_found;
	  (* The regexp did not match: Move the window by one block.
	   *)
	  let n =
	    min
	      (Netstream.window_length s)
	      (Netstream.block_size s)
	  in
	  Netstream.move s n;
	  search_first_boundary()
  in

  (* Subtask 3: Search the next boundary line. Invoke 'add' for every
   * read chunk
   *)
  let rec search_next_boundary p =
    (* Returns the position directly after the boundary on the same line *)
    try
      (* Search boundary per regexp: *)
      let i,r = Str.search_forward
		  boundary1_re
		  (Netbuffer.unsafe_buffer (Netstream.window s))
		  0
      in
      (* If match_end <= window_length, the search was successful.
       * Otherwise, we searched in the uninitialized region of the
       * buffer.
       *)
      if Str.match_end r <= Netstream.window_length s then begin
	(* Add the last chunk of the part. *)
	let n =
	  (* i is a LF. i - 1 should be CR. Ignore these characters. *)
	  if i >= 1 then
	    match (Netbuffer.unsafe_buffer (Netstream.window s)).[ i - 1 ] with
		'\013' -> i - 1
	      | _      -> i
	  else
	    i
	in
	(* Printf.printf "add n=%d\n" n; *)
	add p s 0 n;
	Str.match_end r
      end
      else raise Not_found
    with
	Not_found ->
	  if Netstream.at_eos s then
	    failwith "Mimestring.scan_multipart_body_from_netstream: next MIME boundary not found";
	  (* The regexp did not match: Add the first block of the window;
	   * and move the window.
	   *)
	  let n =
	    min
	      (Netstream.window_length s)
	      (Netstream.block_size s)
	  in
	  (* Printf.printf "add n=%d\n" n; *)
	  add p s 0 n;
	  Netstream.move s n;
	  search_next_boundary p
  in

  (* Subtask 4: Search the end of the boundary line *)
  let rec search_end_of_line k =
    (* Search LF beginning at position k. Discard any contents until that. *)
    try
      let i,r = Str.search_forward
		  lf_re
		  (Netbuffer.unsafe_buffer (Netstream.window s))
		  k
      in
      (* If match_end <= window_length, the search was successful.
       * Otherwise, we searched in the uninitialized region of the
       * buffer.
       *)
      if Str.match_end r <= Netstream.window_length s then begin
	 Str.match_end r
      end
      else raise Not_found
    with
	Not_found ->
	  if Netstream.at_eos s then
	    failwith "Mimestring.scan_multipart_body_from_netstream: MIME boundary without line end";
	  (* The regexp did not match: move the window.
	   *)
	  let n = Netstream.window_length s in
	  Netstream.move s n;
	  search_end_of_line 0
  in

  (* Subtask 5: Check whether "--" follows the boundary on the same line *)
  let check_whether_last_boundary k =
    (* k: The position directly after the boundary. *)
    Netstream.want s (k+2);
    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
    (Netstream.window_length s >= k+2) && str.[k] = '-' && str.[k+1] = '-'
  in

  (* Subtask 6: Check whether the buffer begins with a boundary. *)
  let check_beginning_is_boundary () =
    let m = String.length boundary + 2 in
    Netstream.want s m;
    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
    (Netstream.window_length s >= m) &&
    (Str.string_match boundary2_re str 0 <> None)
  in

  let rec parse_part () =
    (* The first byte of the current window of s contains the character
     * directly following the boundary line that starts this part.
     *)
    (* Search the end of the MIME header: *)
    let k_eoh = search_end_of_header 0 in
    (* Printf.printf "k_eoh=%d\n" k_eoh; *)
    (* Get the MIME header: *)
    let str = Netbuffer.unsafe_buffer (Netstream.window s) in
    let header, k_eoh' = scan_header str 0 k_eoh in
    assert (k_eoh = k_eoh');
    (* Move the window over the header: *)
    Netstream.move s k_eoh;
    (* Create the part: *)
    let p = create header in
    let continue =
      begin try
	(* Search the next boundary; add the chunks while searching: *)
	let k_eob = search_next_boundary p in
	(* Printf.printf "k_eob=%d\n" k_eob; *)
        (* Is this the last boundary? *)
	if check_whether_last_boundary k_eob then begin
	  (* Skip the rest: *)
	  while not (Netstream.at_eos s) do
	    Netstream.move s (Netstream.window_length s)
	  done;
	  Netstream.move s (Netstream.window_length s);
	  false
	end
	else begin
	  (* Move to the beginning of the next line: *)
	  let k_eol = search_end_of_line k_eob in
	  Netstream.move s k_eol;
	  true
	end
      with
	  any ->
	    (try stop p with _ -> ());
	    raise any
      end in
      stop p;
      if continue then
	(* Continue with next part: *)
	parse_part()
  in

  (* Check whether s directly begins with a boundary: *)
  if check_beginning_is_boundary() then begin
    (* Move to the beginning of the next line: *)
    let k_eol = search_end_of_line 0 in
    Netstream.move s k_eol;
    (* Begin with first part: *)
    parse_part()
  end
  else begin
    (* Search the first boundary: *)
    try
      let k_eob = search_first_boundary() in
      (* Printf.printf "k_eob=%d\n" k_eob; *)
      (* Move to the beginning of the next line: *)
      let k_eol = search_end_of_line k_eob in
      (* Printf.printf "k_eol=%d\n" k_eol; *)
      Netstream.move s k_eol;
      (* Begin with first part: *)
      parse_part()
    with
	Not_found ->
	  (* No boundary at all: The body is empty. *)
	  ()
  end;
;;


(* ======================================================================
 * History:
 *
 * $Log: mimestring.ml,v $
 * Revision 1.9  2001/07/01 10:25:10  gerd
 * 	Fixed: string_partial_match does not what I expected.
 *
 * Revision 1.8  2000/08/13 00:04:36  gerd
 * 	Encoded_word -> EncodedWord
 * 	Bugfixes.
 *
 * Revision 1.7  2000/08/07 00:25:14  gerd
 * 	Implemented the new functions for structured field lexing.
 *
 * Revision 1.6  2000/06/25 22:34:43  gerd
 * 	Added labels to arguments.
 *
 * Revision 1.5  2000/06/25 21:15:48  gerd
 * 	Checked thread-safety.
 *
 * Revision 1.4  2000/05/16 22:30:14  gerd
 * 	Added support for some types of malformed MIME messages.
 *
 * Revision 1.3  2000/04/15 13:09:01  gerd
 * 	Implemented uploads to temporary files.
 *
 * Revision 1.2  2000/03/02 01:15:30  gerd
 * 	Updated.
 *
 * Revision 1.1  2000/02/25 15:21:12  gerd
 * 	Initial revision.
 *
 *
 *)