File: collisions.ml

package info (click to toggle)
ppx-hash 0.17.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 316 kB
  • sloc: ml: 1,961; ansic: 180; makefile: 14; sh: 11
file content (158 lines) | stat: -rw-r--r-- 4,710 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
open Core

module Tests (Hash : Base.Hash.S with type hash_value = int) = struct
  module Ppx_hash_lib = struct
    module Std = struct
      module Hash = Base.Hash.F (Hash)
    end
  end

  open Ppx_hash_lib.Std.Hash.Builtin

  let hash = `dont_use

  module State = struct
    module T = struct
      type t = Hash.state

      let compare = Hash.For_tests.compare_state
      let sexp_of_t s : Sexp.t = Atom (Hash.For_tests.state_to_string s)
      let t_of_sexp _ = assert false
    end

    include T
    include Comparable.Make (T)
  end

  let should_have_no_collisions list sexp_of_t hash =
    let m =
      State.Map.of_alist_multi
        (List.map list ~f:(fun v -> hash (Hash.reset (Hash.alloc ())) v, v))
    in
    Map.iteri m ~f:(fun ~key:hash ~data:values ->
      match values with
      | [] -> assert false
      | [ _ ] -> ()
      | _ :: _ :: _ ->
        failwiths ~here:[%here] "collision" (hash, values) [%sexp_of: State.t * t list])
  ;;

  module Ints = struct
    let should_have_no_collisions l s f =
      should_have_no_collisions l s (fun s x -> hash_fold_int s (f x))
    ;;

    (* these tests can have false positives, but those should be fixable by tweaking the
       [ints] list. *)
    let ints = [ 0; 1; 2; 3; 100; 500; (*0x1234567812345678;*) -1; -2 ]
    let ( % ) a b = [%hash: int * int] (a, b)
    let zero = 0

    (* The following tests require incrementally increasing hash function quality *)

    let%test_unit "simple combine2 collisions" =
      should_have_no_collisions
        (List.cartesian_product ints ints)
        [%sexp_of: int * int]
        (fun (a, b) -> a % b)
    ;;

    let%test_unit "more complicated combine2 collisions" =
      should_have_no_collisions
        (List.cartesian_product ints ints)
        [%sexp_of: int * int]
        (fun (a, b) -> a % (b % zero))
    ;;

    let%test_unit "yet more complicated combine2s collisions" =
      should_have_no_collisions
        (List.cartesian_product ints ints)
        [%sexp_of: int * int]
        (fun (a, b) -> zero % a % (b % zero))
    ;;
  end

  let hash_int x =
    let h = Hash.alloc () in
    let h = Hash.reset h in
    Hash.fold_int h x
  ;;

  let init () =
    let h = Hash.alloc () in
    Hash.reset h
  ;;

  let hash_string x =
    let h = Hash.alloc () in
    let h = Hash.reset h in
    Hash.fold_string h x
  ;;

  let ( = ) x y = Hash.For_tests.compare_state x y = 0
  let assert_different hash_t a b = assert (not (hash_t a = hash_t b))
  let%test_unit _ = assert_different hash_int 0 (1 lsl 32)

  let%test_unit _ =
    let a1 = String.make 7 'a' in
    let b = String.make 7 'b' in
    let a2 = String.make 7 'a' in
    let c = Some 5 in
    let a3 = String.make 7 'a' in
    let d = Obj.new_block Obj.abstract_tag 1 in
    assert (hash_string a1 = hash_string a2);
    assert (hash_string a1 = hash_string a3);
    assert (not (phys_same a1 b));
    assert (not (phys_same a1 c));
    assert (not (phys_same a1 d))
  ;;

  let%test_unit _ = assert_different hash_string "\200\200\200\200" "\200a\200\200"
  let%test_unit _ = assert_different hash_string "\200\200\200" "\200a\200"

  let%test_unit "int collisions" =
    Map.to_alist
      (Int.Map.of_alist_multi
         (List.init 100_000 ~f:(fun i ->
            Hash.get_hash_value (hash_int i) land ((1 lsl 17) - 1), i)))
    |> List.iter ~f:(fun (_, vs) ->
         (*  the number 10 is motivated by 0.9999 being close enough
         to 1 in the following R expression:

         ppois(10, lambda = 10^5 / 2^17) ^ (2^17)
         [1] 0.9999167

         With enough hand-waving and invocation of Poisson limit theorem
         I convinced myself that poisson distribution is an OK approximation. *)
         [%test_pred: int] (fun x -> x <= 10) (List.length vs))
  ;;

  let%test_unit "list collisions" =
    should_have_no_collisions
      [ []; [ [] ]; [ [ [] ] ]; [ [ [ "hello" ] ] ]; [ []; [] ] ]
      [%sexp_of: string list list list]
      [%hash_fold: string list list list]
  ;;

  type 'a array_frozen = 'a array

  let sexp_of_array_frozen = sexp_of_array

  let%test_unit "array collisions" =
    should_have_no_collisions
      [ [||]; [| [||] |]; [| [| [||] |] |]; [| [| [| "hello" |] |] |]; [| [||]; [||] |] ]
      [%sexp_of: string array_frozen array_frozen array_frozen]
      [%hash_fold: string array_frozen array_frozen array_frozen]
  ;;

  let%test_unit "string collisions" =
    should_have_no_collisions
      [ "", [ 16 lsl 56; 0 ]; String.make 8 '\000' ^ "\002" ^ String.make 7 '\000', [] ]
      [%sexp_of: string * int list]
      [%hash_fold: string * int list]
  ;;
end

module I = Tests (Base.Hash)
module S = Tests (Siphash_lib.Siphash)
module P = Tests (Perfect_hash)