(*  Copyright (c) 2001 Anthony L Shipman *)

(* $Id: ietf_line.sml,v 1.3 2001/08/27 20:09:44 felix Exp $ *)

(*  This processes IETF lines such as HTTP headers

@#34567890123456789012345678901234567890123456789012345678901234567890
*)

signature IETF_LINE =
sig

    (*	Split a string into tokens and characters.
    *)
    val split:	string -> IETF_Part.Part list


    (*	This is used to reconstitute a header field from
	its parts. The result must preserve the semantics of the original
	header. The supplied parts should contain the original white
	space except for leading white space.  We will remove trailing
	white space.

	Tokens may contain embedded space and specials if they came from
	a quoted string.

	A token containing a " character cannot be represented in HTTP1.0
	since it would have to be within a quoted string but one isn't
	allowed within a quoted string!  HTTP1.1 allows \" in a quoted
	string. We will drop " characters for HTTP1.0.
	REVISIT - Fix this for HTTP1.1.

	Bad characters are stripped.
    *)
    val join:	IETF_Part.Part list -> string


    (*	If a string contains special characters then quote the field.
	Control characters are not allowed.

	We can use the lexer to recognise the different kinds of 
	characters and just rejoin them with quoting.
    *)
    val quoteField: string -> string


    (*	Dump for testing. *)
    val dump:	IETF_Part.Part list -> unit

end


structure IETF_Line: IETF_LINE =
struct

    open Common
    structure SS = Substring

    structure IP = IETF_Part

(*------------------------------------------------------------------------------*)


    (*	Run the string through the header lexer. *)
    fun split str : IP.Part list =
    let
	val done = ref false
	fun input n = if !done then "" else (done := true; str)

	val lexer = IETFLex.makeLexer input

	fun read toks =
	(
	    case lexer() of
	      IP.TEOF => rev toks
	    | t       => read (t::toks)
	)
    in
	read []
    end




    and join hparts =
    let
	fun to_str []         rslt = concat(rev rslt)
	|   to_str [IP.TWh _] rslt = to_str [] rslt    (* trailing ws *)
	|   to_str ((IP.Token s1)::r) rslt = to_str r ((quote s1)::rslt)
	|   to_str ((IP.TWh s)::r)    rslt = to_str r (s :: rslt)
	|   to_str ((IP.TSpec c)::r)  rslt = to_str r ((str c) :: rslt)
	|   to_str ((IP.TBad  c)::r)  rslt = to_str r rslt
	|   to_str (IP.TEOF::r)       rslt = to_str r rslt


	and quote str =
	let
	    (* If there are unsafe characters then right won't be empty.
	    *)
	    val (_, right) = SS.splitl safe (SS.all str)
	in
	    if SS.isEmpty right
	    then
		str
	    else
		strip_dq str
	end


	and safe c = not (Char.isCntrl c orelse
			  Char.isSpace c orelse
			  Char.contains "()<>@,;:\\\"/[]?={}" c)

	and strip_dq str =
	let
	    val fields = SS.fields (fn c => c = #"\"") (SS.all str)
	in
	    concat("\"" :: ((map SS.string fields) @ ["\""]))
	end

    in
	to_str hparts []
    end



    (*	If a string contains special characters then quote the field.
	Control characters are not allowed.

	We can use the lexer to recognise the different kinds of 
	characters and just rejoin them with quoting.
    *)
    and quoteField field = join(split field)


    (*	Dump for testing. *)
    and dump hparts =
    let
	fun put (IP.Token s1) = (print " Tok '"; print s1; print "'")
	|   put (IP.TSpec c)  = (print " TSpec '"; print (str c); print "'")
	|   put (IP.TWh s)    = (print " TWs '"; print s; print "'")
	|   put (IP.TBad  c)  = (print " TBad")
	|   put  IP.TEOF      = (print " TEOF")
    in
	app put hparts
    end

(*------------------------------------------------------------------------------*)

end
