1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
|
open Core
module Tests (Hash : Base.Hash.S with type hash_value = int) = struct
module Ppx_hash_lib = struct
module Std = struct
module Hash = Base.Hash.F (Hash)
end
end
open Ppx_hash_lib.Std.Hash.Builtin
let hash = `dont_use
module State = struct
module T = struct
type t = Hash.state
let compare = Hash.For_tests.compare_state
let sexp_of_t s : Sexp.t = Atom (Hash.For_tests.state_to_string s)
let t_of_sexp _ = assert false
end
include T
include Comparable.Make (T)
end
let should_have_no_collisions list sexp_of_t hash =
let m =
State.Map.of_alist_multi
(List.map list ~f:(fun v -> hash (Hash.reset (Hash.alloc ())) v, v))
in
Map.iteri m ~f:(fun ~key:hash ~data:values ->
match values with
| [] -> assert false
| [ _ ] -> ()
| _ :: _ :: _ ->
failwiths ~here:[%here] "collision" (hash, values) [%sexp_of: State.t * t list])
;;
module Ints = struct
let should_have_no_collisions l s f =
should_have_no_collisions l s (fun s x -> hash_fold_int s (f x))
;;
(* these tests can have false positives, but those should be fixable by tweaking the
[ints] list. *)
let ints = [ 0; 1; 2; 3; 100; 500; (*0x1234567812345678;*) -1; -2 ]
let ( % ) a b = [%hash: int * int] (a, b)
let zero = 0
(* The following tests require incrementally increasing hash function quality *)
let%test_unit "simple combine2 collisions" =
should_have_no_collisions
(List.cartesian_product ints ints)
[%sexp_of: int * int]
(fun (a, b) -> a % b)
;;
let%test_unit "more complicated combine2 collisions" =
should_have_no_collisions
(List.cartesian_product ints ints)
[%sexp_of: int * int]
(fun (a, b) -> a % (b % zero))
;;
let%test_unit "yet more complicated combine2s collisions" =
should_have_no_collisions
(List.cartesian_product ints ints)
[%sexp_of: int * int]
(fun (a, b) -> zero % a % (b % zero))
;;
end
let hash_int x =
let h = Hash.alloc () in
let h = Hash.reset h in
Hash.fold_int h x
;;
let init () =
let h = Hash.alloc () in
Hash.reset h
;;
let hash_string x =
let h = Hash.alloc () in
let h = Hash.reset h in
Hash.fold_string h x
;;
let ( = ) x y = Hash.For_tests.compare_state x y = 0
let assert_different hash_t a b = assert (not (hash_t a = hash_t b))
let%test_unit _ = assert_different hash_int 0 (1 lsl 32)
let%test_unit _ =
let a1 = String.make 7 'a' in
let b = String.make 7 'b' in
let a2 = String.make 7 'a' in
let c = Some 5 in
let a3 = String.make 7 'a' in
let d = Obj.new_block Obj.abstract_tag 1 in
assert (hash_string a1 = hash_string a2);
assert (hash_string a1 = hash_string a3);
assert (not (phys_same a1 b));
assert (not (phys_same a1 c));
assert (not (phys_same a1 d))
;;
let%test_unit _ = assert_different hash_string "\200\200\200\200" "\200a\200\200"
let%test_unit _ = assert_different hash_string "\200\200\200" "\200a\200"
let%test_unit "int collisions" =
Map.to_alist
(Int.Map.of_alist_multi
(List.init 100_000 ~f:(fun i ->
Hash.get_hash_value (hash_int i) land ((1 lsl 17) - 1), i)))
|> List.iter ~f:(fun (_, vs) ->
(* the number 10 is motivated by 0.9999 being close enough
to 1 in the following R expression:
ppois(10, lambda = 10^5 / 2^17) ^ (2^17)
[1] 0.9999167
With enough hand-waving and invocation of Poisson limit theorem
I convinced myself that poisson distribution is an OK approximation. *)
[%test_pred: int] (fun x -> x <= 10) (List.length vs))
;;
let%test_unit "list collisions" =
should_have_no_collisions
[ []; [ [] ]; [ [ [] ] ]; [ [ [ "hello" ] ] ]; [ []; [] ] ]
[%sexp_of: string list list list]
[%hash_fold: string list list list]
;;
type 'a array_frozen = 'a array
let sexp_of_array_frozen = sexp_of_array
let%test_unit "array collisions" =
should_have_no_collisions
[ [||]; [| [||] |]; [| [| [||] |] |]; [| [| [| "hello" |] |] |]; [| [||]; [||] |] ]
[%sexp_of: string array_frozen array_frozen array_frozen]
[%hash_fold: string array_frozen array_frozen array_frozen]
;;
let%test_unit "string collisions" =
should_have_no_collisions
[ "", [ 16 lsl 56; 0 ]; String.make 8 '\000' ^ "\002" ^ String.make 7 '\000', [] ]
[%sexp_of: string * int list]
[%hash_fold: string * int list]
;;
end
module I = Tests (Base.Hash)
module S = Tests (Siphash_lib.Siphash)
module P = Tests (Perfect_hash)
|