File: HashArray.ML

package info (click to toggle)
polyml 5.7.1-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid
  • size: 40,616 kB
  • sloc: cpp: 44,142; ansic: 26,963; sh: 22,002; asm: 13,486; makefile: 602; exp: 525; python: 253; awk: 91
file content (195 lines) | stat: -rw-r--r-- 6,663 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
(*
    Copyright (c) 2000
        Cambridge University Technical Services Limited

    Modified David C.J. Matthews 2008, 2014, 2015

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License version 2.1 as published by the Free Software Foundation.
    
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
    
    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*)

(* Hash table type - Creates a hash table of specified initial size. This
   version expands the hash table and rehashes when the table gets too full *)

structure HashArray:>
sig
    type 'a hash
    
    val hash: int -> 'a hash
    val update: 'a hash * string * 'a -> unit
    val sub: 'a hash * string -> 'a option
    val delete: 'a hash * string -> unit
    
    val fold: (string * 'a * 'b -> 'b) -> 'b -> 'a hash -> 'b
end =

struct

local
    infix 8 sub

    (* Each entry in the table is a pair containing the key and the value. *)

    (* The type of each entry in the array.  Putting the tuple in here
       allows the compiler to use an optimised representation.
       We have to distinguish empty entries, which stop the search,
       from deleted entries that don't. *)
    datatype 'a namedOption = Empty | Deleted | Used of string * 'a
in

    fun hashValue vecLen str =
        Word.toInt(
            Word.mod(
                CharVector.foldr
                    (fn (ch, n) => Word.fromInt(Char.ord ch) + 0w7*n)
                    0w0 str,
                (Word.fromInt vecLen)))

    (* The above function is the quickest and simplest way of computing the
       hash value now that we have Word.* and Word.mod compiled inline.
       They aren't implemented in all code-generators so it could be worth
       retaining the old code.  DCJM 26/2/01. *)

    datatype 'a hash =
    Hash of
    { 
        used: int ref,
        entries: 'a namedOption array ref
    }

    (* Create an empty table. *)
    fun hash size =
        Hash
        {
            used    = ref 0,
            entries = ref (Array.array (size, Empty))
        }
     
    fun op sub (Hash {entries = ref arr, ...}, name : string) : 'a option =
      (* Searches the table starting from the position given by the hash value. *)
    let
        open Array
        val vecLen = length arr
        (* Loops until it either finds an empty entry - in which case it
           returns NONE, or it finds the name it is looking for.
           There will always be several empty entries because we rehash
           if it gets too full. *)
        fun find i =
        let
            val h = arr sub i
        in
            case h of
                Empty => NONE
            |   Deleted => find ((if i = 0 then vecLen else i) - 1)
            |   Used (n,v) => 
                    if name = n then SOME v else find ((if i = 0 then vecLen else i) - 1)
        end
    in
        find (hashValue vecLen name)
    end

    fun update (Hash {entries as ref currentArray, used}, name, value) =
    let
        open Array

        fun enter a i (entry as (name, _)) =
        (* Loops until it either finds an empty entry - in which case it
           enters the value in there, or it finds the string.
           If it finds a deleted entry it can reuse that but it must check
           that we haven't also got the same string further along. *)
        case a sub i of
            Empty =>  (* Empty.  Add the entry and increment "used". *)
                (update (a, i, Used entry); true)
        |   Deleted => (* Deleted.  Use this entry. *)
            let
                fun checkEntry i =
                    case a sub i of
                        Empty => ()
                    |   Deleted => checkEntry((if i = 0 then length a else i) - 1)
                    |   Used(n, _) =>
                            if n = name
                            then update(a, i, Deleted)
                            else checkEntry((if i = 0 then length a else i) - 1)
            in
                checkEntry i;
                update (a, i, Used entry);
                false
            end
        |   Used (n, _) =>  (* In use.  Overwrite if it's the same name. *)
                if n = name
                then (update (a, i, Used entry); false) (* Same name as previous - overwrite it *)
                else enter a ((if i = 0 then length a else i) - 1) entry;
 
        val () =
            if enter currentArray (hashValue (length currentArray) name) (name, value)
            then used := !used + 1
            else ()

        val currentSize = length currentArray
    in
        (* Do we need to rehash ? *)
        if !used * 5 > currentSize * 4 (* More than 80% full so rehash *)
        then
        let
            val newN     = currentSize * 2 (* Double the size *)
            val newA     = array (newN, Empty)
            val hashNewN = hashValue newN

            fun copyOver(Used(entry as (name, _))) =
                if enter newA (hashNewN name) entry
                then used := !used+1
                else ()
            |   copyOver _ = ()
            
        in
            (* Reset the count to include only non-deleted entries. *)
            used := 0;
            (* Copy into the new array *)
            Array.app copyOver currentArray;
            entries := newA
        end
        else ()
    end

    fun fold f init (Hash { entries = ref e, ...}) =
    let
        fun getEntry(Used(name,alpha), acc) = f (name, alpha, acc)
        |   getEntry(_, acc) = acc
    in
        Array.foldl getEntry init e
    end

    fun delete(Hash {entries = ref arr, ...}, name) =
    let
        open Array
        val vecLen = length arr
        (* Similar to "sub" except that it overwrites the entry if it finds it. *)
        fun find i =
        let
            val h = arr sub i
        in
            case h of
                Empty => () (* Not there *)
            |   Deleted => find ((if i = 0 then vecLen else i) - 1)
            |   Used (n, _) => 
                    if name = n
                    then update(arr, i, Deleted)
                    else find ((if i = 0 then vecLen else i) - 1)
        end
    in
        find (hashValue vecLen name)
    end

end (* local *);

end (* HashArray *);