File: url.mll

package info (click to toggle)
hevea 2.38-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,824 kB
  • sloc: ml: 19,525; sh: 505; makefile: 311; ansic: 132
file content (104 lines) | stat: -rw-r--r-- 2,943 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
(***********************************************************************)
(*                                                                     *)
(*                          HEVEA                                      *)
(*  Luc Maranget, projet PARA, INRIA Rocquencourt                      *)
(*                                                                     *)
(*  Copyright 2012 Institut National de Recherche en Informatique et   *)
(*  en Automatique.  Distributed only by permission.                   *)
(*                                                                     *)
(*                                                                     *)
(***********************************************************************)

(* URL encoding and decoding, As the issue is still pending, apply to fragment only! *)

{
 open Printf

 type url =
 {
  scheme : string option ;
  authority : string option ;
  path : string ;
  query : string option ;
  fragment : string option ;
 }

 exception Error
}

let hex = ['0'-'9''A'-'F''a'-'f']

rule break = parse
|
([^':''/''?''#']+ as scheme ':') ?
("//" ([^'/''?''#']* as authority)) ?
([^'?''#']* as path)
('?' [^'#']* as query)?
('#' (_* as fragment))?
{ {scheme; authority; path; query; fragment;} }
| "" { raise Error }

and do_decode putc = parse
| '%' (hex as a) (hex as b)
  { let n =
    try int_of_string (sprintf "0x%c%c" a b) with _ -> assert false in
  putc (Char.chr n) ;
  do_decode putc lexbuf }
| _ as c { putc c ; do_decode putc lexbuf }
| eof    { () }

{
(* See
http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding/#Thereservedcharactersarenotwhatyouthinktheyare
*)
 let do_encode_fragment putc put c =  match c with
 |  'a'..'z' | 'A'..'Z' | '0'..'9' | '-' | '_' | '~' | '.'
   -> putc c
 | _ ->
    let c = match c with
    | '\n'|'\t' -> ' '
    | _ -> c in
    put (sprintf "%%%02X" (Char.code c))

 let do_encode putc put specific u =
   let len = String.length u in
   for k =0 to len-1 do
     let c = String.unsafe_get u k in
     specific putc put c
   done

 let apply putc put f u =
   begin match u.scheme with
   | None -> ()
   | Some s -> f s ; putc ':'
   end ;
   begin match u.authority with
   | None -> ()
   | Some s -> put "//" ; f s
   end ;
   f u.path ;
   begin match u.query with
   | None -> ()
   | Some s -> putc '?' ; f s
   end ;
   begin match u.fragment with
   | None -> ()
   | Some s -> putc '#' ; f s
   end ;
   ()

 let _encode putc put u =
   let u = break (MyLexing.from_string u) in
   apply putc put (do_encode putc put do_encode_fragment) u

 let _decode putc put u =
   let u = break (MyLexing.from_string u) in
   let do_decode s = do_decode putc (MyLexing.from_string s) in
   apply putc put do_decode u


  let encode_fragment putc put u =
    do_encode putc put do_encode_fragment u

  let decode_fragment putc _put u = do_decode putc (MyLexing.from_string u)
}