1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
|
module type Str_intf = module type of Str
module Test_matches (R : Str_intf) = struct
let groups () =
let group i =
try `Found (R.group_beginning i)
with
| Not_found -> `Not_found
| Invalid_argument _ -> `Not_exists in
let rec loop acc i =
match group i with
| `Found p -> loop ((p, R.group_end i)::acc) (i + 1)
| `Not_found -> loop ((-1, -1)::acc) (i + 1)
| `Not_exists -> List.rev acc in
loop [] 0
let eq_match ?(pos=0) ?(case=true) r s =
let pat = if case then R.regexp r else R.regexp_case_fold r in
try
ignore (R.search_forward pat s pos);
Some (groups ())
with Not_found -> None
end
module Fmt = struct
include Format
type 'a t = Format.formatter -> 'a -> unit
let list = pp_print_list
let str = pp_print_string
let sexp fmt s pp x = fprintf fmt "@[<3>(%s@ %a)@]" s pp x
let pair pp1 pp2 fmt (v1,v2) =
pp1 fmt v1; pp_print_space fmt () ; pp2 fmt v2
let triple pp1 pp2 pp3 fmt (v1, v2, v3) =
pp1 fmt v1; pp_print_space fmt () ;
pp2 fmt v2; pp_print_space fmt () ;
pp3 fmt v3
let int = pp_print_int
let optint fmt = function
| None -> ()
| Some i -> fprintf fmt "@ %d" i
let quote fmt s = Format.fprintf fmt "\"%s\"" s
let pp_olist pp_elem fmt =
Format.fprintf fmt "@[<3>[@ %a@ ]@]"
(pp_print_list
~pp_sep:(fun fmt () -> fprintf fmt ";@ ")
pp_elem)
let pp_str_list = pp_olist quote
let to_to_string pp x =
let b = Buffer.create 16 in
let fmt = Format.formatter_of_buffer b in
pp fmt x;
Buffer.contents b
end
module T_str = Test_matches(Str)
module T_re = Test_matches(Re.Str)
let expect_equal_app f fx g gx =
assert (f fx = g gx)
let eq_match ?pos ?case r s =
expect_equal_app
(fun () -> T_str.eq_match ?pos ?case r s) ()
(fun () -> T_re.eq_match ?pos ?case r s) ()
;;
let split_result_conv = List.map (function
| Str.Delim x -> Re.Str.Delim x
| Str.Text x -> Re.Str.Text x)
let pp_split_result_list =
Fmt.pp_olist (fun fmt x ->
let (tag, arg) =
match x with
| Re.Str.Delim x -> ("Delim", x)
| Re.Str.Text x -> ("Text", x) in
Fmt.fprintf fmt "%s@ (\"%s\")" tag arg)
let pp_fs pp_args pp_out fmt (name, re, args, ex, res) =
let f fmt (mod_, r) =
Fmt.fprintf fmt "%s.%s %a %a = %a"
mod_ name Fmt.quote re pp_args args pp_out r in
Fmt.fprintf fmt "@.%a@.%a"
f ("Str", ex)
f ("Re.Str", res)
type ('a, 'b) test =
{ name: string
; pp_args : 'a Fmt.t
; pp_out : 'b Fmt.t
; re_str: Re.Str.regexp -> 'a -> 'b
; str: Str.regexp -> 'a -> 'b }
let bounded_split_t =
{ name = "bounded_split"
; pp_args = (fun fmt (s, n) -> Fmt.fprintf fmt "%a %d" Fmt.quote s n)
; pp_out = Fmt.pp_str_list
; re_str = (fun re (s, n) -> Re.Str.bounded_split re s n)
; str = (fun re (s, n) -> Str.bounded_split re s n) }
let bounded_full_split_t =
{ bounded_split_t with
name = "bounded_full_split"
; pp_out = pp_split_result_list
; re_str = (fun re (s, n) -> Re.Str.bounded_full_split re s n)
; str = (fun re (s, n) ->
split_result_conv (Str.bounded_full_split re s n)) }
let full_split_t =
{ bounded_full_split_t with
name = "full_split"
; pp_args = (fun fmt s -> Fmt.fprintf fmt "%a" Fmt.quote s)
; re_str = (fun re s -> Re.Str.full_split re s)
; str = (fun re s -> split_result_conv (Str.full_split re s)) }
let split_delim_t =
{ full_split_t with
name = "split_delim"
; pp_out = Fmt.pp_str_list
; re_str = (fun re s -> Re.Str.split_delim re s)
; str = (fun re s -> Str.split_delim re s) }
let split_t =
{ name = "split"
; pp_out = Fmt.pp_str_list
; pp_args = full_split_t.pp_args
; re_str = (fun re s -> Re.Str.split re s)
; str = (fun re s -> Str.split re s) }
let global_replace_t =
{ name = "global_replace"
; pp_out = Fmt.pp_print_string
; pp_args = (fun fmt (r, s) -> Fmt.fprintf fmt "%a %a"
Fmt.quote r Fmt.quote s)
; re_str = (fun re (r, s) -> Re.Str.global_replace re r s)
; str = (fun re (r, s) -> Str.global_replace re r s) }
let test t re args =
assert ((t.re_str (Re.Str.regexp re) args) = (t.str (Str.regexp re) args))
let split_delim re s = test split_delim_t re s
let split re s = test split_t re s
let full_split re s = test full_split_t re s
let bounded_split re s n = test bounded_split_t re (s, n)
let bounded_full_split re s n = test bounded_full_split_t re (s, n)
let global_replace re r s = test global_replace_t re (r, s)
let expect_pass _s f = f ()
let _ =
(* Literal Match *)
expect_pass "str" (fun () ->
eq_match "a" "a";
eq_match "a" "b";
);
(* Basic Operations *)
expect_pass "alt" (fun () ->
eq_match "a\\|b" "a";
eq_match "a\\|b" "b";
eq_match "a\\|b" "c";
);
expect_pass "seq" (fun () ->
eq_match "ab" "ab";
eq_match "ab" "ac";
);
expect_pass "epsilon" (fun () ->
eq_match "" "";
eq_match "" "a";
);
expect_pass "rep" (fun () ->
eq_match "a*" "";
eq_match "a*" "a";
eq_match "a*" "aa";
eq_match "a*" "b";
);
expect_pass "rep1" (fun () ->
eq_match "a+" "a";
eq_match "a+" "aa";
eq_match "a+" "";
eq_match "a+" "b";
);
expect_pass "opt" (fun () ->
eq_match "a?" "";
eq_match "a?" "a";
);
(* String, line, word *)
expect_pass "bol" (fun () ->
eq_match "^a" "ab";
eq_match "^a" "b\na";
eq_match "^a" "ba";
);
expect_pass "eol" (fun () ->
eq_match "a$" "ba";
eq_match "a$" "a\nb";
eq_match "a$" "ba\n";
eq_match "a$" "ab";
);
expect_pass "start" (fun () ->
eq_match ~pos:1 "Za" "xab";
eq_match ~pos:1 "Za" "xb\na";
eq_match ~pos:1 "Za" "xba";
);
(* Match semantics *)
expect_pass "match semantics" (fun () ->
eq_match "\\(a\\|b\\)*b" "aabaab";
eq_match "aa\\|aaa" "aaaa";
eq_match "aaa\\|aa" "aaaa";
);
(* Group (or submatch) *)
expect_pass "group" (fun () ->
eq_match "\\(a\\)\\(a\\)?\\(b\\)" "ab";
);
(* Character set *)
expect_pass "rg" (fun () ->
eq_match "[0-9]+" "0123456789";
eq_match "[0-9]+" "a";
);
expect_pass "compl" (fun () ->
eq_match "[^0-9a-z]+" "A:Z+";
eq_match "[^0-9a-z]+" "0";
eq_match "[^0-9a-z]+" "a";
);
(* Case modifiers *)
expect_pass "no_case" (fun () ->
eq_match ~case:false "abc" "abc";
eq_match ~case:false "abc" "ABC";
);
expect_pass "global_replace" (fun () ->
global_replace "needle" "test" "needlehaystack";
global_replace "needle" "" "";
global_replace "needle" "" "needle";
global_replace "xxx" "yyy" "zzz";
global_replace "test\\([0-9]*\\)" "\\1-foo-\\1" "test100 test200 test";
global_replace "test\\([0-9]*\\)" "'\\-0'" "test100 test200 test";
(* Regrssion test for #129 *)
global_replace "\\(X+\\)" "A\\1YY" "XXXXXXZZZZ"
);
expect_pass "bounded_split, bounded_full_split" (fun () ->
List.iter (fun (re, s, n) ->
bounded_full_split re s n;
bounded_split re s n)
[ ",", "foo,bar,baz", 5
; ",", "foo,bar,baz", 1
; ",", "foo,bar,baz", 0
; ",\\|", "foo,bar|baz", 4 ]
);
expect_pass "split, full_split, split_delim" (fun () ->
List.iter (fun (re, s) ->
split re s;
full_split re s;
split_delim re s)
[ "re", ""
; " ", "foo bar"
; "\b", "one-two three"
; "[0-9]", "One3TwoFive"]
)
|