File: HashTable.ML

package info (click to toggle)
polyml 5.2.1-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 19,692 kB
  • ctags: 17,567
  • sloc: cpp: 37,221; sh: 9,591; asm: 4,120; ansic: 428; makefile: 203; ml: 191; awk: 91; sed: 10
file content (273 lines) | stat: -rw-r--r-- 8,893 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
(*
	Copyright (c) 2000
		Cambridge University Technical Services Limited

	This library is free software; you can redistribute it and/or
	modify it under the terms of the GNU Lesser General Public
	License as published by the Free Software Foundation; either
	version 2.1 of the License, or (at your option) any later version.
	
	This library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
	Lesser General Public License for more details.
	
	You should have received a copy of the GNU Lesser General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*)

(* Hash table type - Creates a hash table of specified initial size. This
   version expands the hash table and rehashes when the table gets too full *)

(*
Changes:
  25/03/94 SPF bugfix (off-by-one error in hashOver/next function)

*)


(* HACKS: Misc, Array, Int.rem *)

structure HashTable:

(*****************************************************************************)
(*                  HashTable export signature                               *)
(*****************************************************************************)
sig
  type 'a hash
  type 'a iter
  
  val hashMake: int -> '_a hash
  val hashSet: '_a hash * string * '_a -> unit
  val hashSub: 'a hash * string -> 'a option
  
  (* An iterator over the non-empty entries in the table. *)
  val hashOver: 'a hash -> (string * 'a) iter
  val hashFold: 'a hash -> (string -> 'a -> 'b -> 'b) -> 'b -> 'b
  (* Construct an immutable hash table from a mutable one. *)
  val hashFreeze: 'a hash -> 'a hash
end =

(*****************************************************************************)
(*                  HashTable structure body                                 *)
(*****************************************************************************)
struct

local
  open Misc;
  infix 8 sub

  (* Each entry in the table is a pair containing the key and the value. *)
  
  (* make namedOption local, because that means the new compiler can
     optimise its representation. SPF 11/5/95 *)
  datatype 'a namedOption = None | Some of (string * 'a);
in
  
  type 'a iter = 'a iter;

  fun hashValue (N : int) (str: string) : int =
	  	Word.toInt(
		    Word.mod(
    		  	CharVector.foldr
    				(fn (ch: char, n: Word.word) => Word.fromInt(Char.ord ch) + 0w7*n)
    				0w0 str,
			    (Word.fromInt N)))

(* The above function is the quickest and simplest way of computing the
   hash value now that we have Word.* and Word.mod compiled inline.
   They aren't implemented in all code-generators so it could be worth
   retaining the old code.  DCJM 26/2/01. *)

  datatype 'a hash =
    Hash of
      { 
        used: int ref,
        entries: 'a namedOption array ref,
        hash: (string -> int) ref
      }
   | Frozen of
      {
        entries: 'a namedOption vector,
        hash: (string -> int)
	  };
  
    fun hashMake (n : int) =
        Hash
          {
            used    = ref 0,
            entries = ref (Array.array (n, None)),
            hash    = ref (hashValue n)
          }
         handle Size => 
            raise InternalError ("HashTable.hash: " ^ Int.toString n);
    
    fun hashSub (Hash {entries = ref A, hash = ref hashN, ...}, name : string) : 'a option =
      (* Searches the table starting from the position given by the hash value. *)
    let
        open Array
        (* Loops until it either finds an empty entry - in which case it
           returns NONE, or it finds the name it is looking for.
           There will always be several empty entries because we rehash
           if it gets too full. *)
        fun find i =
        let
            val h = A sub i;
        in
            case h of
                None => NONE
            |   Some (n,v) => 
                    if name = n then SOME v else find ((if i = 0 then length A else i) - 1)
        end
    in
        find (hashN name)
    end
  | hashSub (Frozen {entries = V, hash = hashN, ...}, name : string) : 'a option =
    let
        open Vector
        
        fun find i =
        let
            val h = V sub i;
        in
            case h of
                None => NONE
            |   Some (n,v) => 
                    if name = n then SOME v else find ((if i = 0 then length V else i) - 1)
        end
    in
        find (hashN name)
    end

    fun hashSet (Frozen _, name : string, value :  '_a) : unit =
       raise Fail ("Attempt to set a value with name (" ^ name ^ ") in a frozen hash table")
	   
    |  hashSet (Hash {entries, used, hash}, name : string, value :  '_a) =
    let
	    open Array
        (* Enters the value at the first free entry at or after the
           one pointed to by the hash value. *)
        fun enterTab (A : '_a namedOption array, i : int, None : '_a namedOption) = ()
          | enterTab (A, i, entry as Some (name,_)) =
        let
          fun enter (i : int) : unit =
            (* Loops until it either finds an empty entry - in which case it
               enters the value in there, or it finds the string. *)
            case (A sub i) of
              None        => update (A, i, entry)
            | Some (n : string, _) => 
               if n = name
               then update (A, i, entry) (* Same name as previous - overwrite it *)
               else enter ((if i = 0 then length A else i) - 1);
        in
          enter i
        end 
        
        val A : '_a namedOption array = !entries;
        val N : int                   = length A;
        val hashN : string -> int     = !hash
        
        val U : unit = enterTab (A, hashN name, Some (name, value));
        val U : unit = used := !used + 1;
    in
        (* Do we need to rehash ? *)
        if !used * 5 > N * 4 (* More than 80% full so rehash *)
        then
		let
            val newN  : int                   = N * 2; (* Double the size *)
            val newA  : '_a namedOption array = array (newN, None);
            val hashNewN : string -> int      = hashValue newN;
          
            fun copyOver (index : int) : unit =
                if index < 0 then ()
                else
                (
                  case (A sub index) of
                    None                => ()
                  | entry as Some (name : string,_) =>
                      enterTab (newA, hashNewN name, entry);
                  
                  copyOver (index - 1)
                );
            
            val U : unit = entries := newA;
            val U : unit = hash := hashNewN;
        in
            copyOver (length A - 1)
        end
        else ()
    end;
  
    fun hashOver table =
    let
	    val (length, getItem) =
		    case table of
			    Hash { entries = ref e, ...} => (Array.length e, fn i => Array.sub(e, i))
			|   Frozen { entries, ... } => (Vector.length entries, fn i => Vector.sub(entries, i))
        (* An iterator over the non-empty entries in the table. *)
          
        fun continue i = i < length;
  
        fun value i =
            (* Must check for limit since value can be called explicitly
               even if continue is false. *)
          
        if continue i then
            case (getItem i) of
              None      => raise InternalError "HashTable.hashOver.value (None)"
			| Some pair => pair
        else
            raise InternalError "HashTable.hashOver.value (limit passed)"

        (* Return the next non-empty entry. *)
        fun next i =
        let
            val n = i + 1;
        in
            if n >= length
            then
                makeIter n (* stop if we've reached the end (bugfixed SPF 25/3/94) *)
            else
                case (getItem n) of
                  None   => next n
                | Some _ => makeIter n
        end
  
    
        and makeIter i = 
          Iter
            {
              continue = continue i,
              next     = fn () => next i,
              value    = fn () => value i
            };
    in
        next ~1
    end;

    fun hashFold table f =
    let
        fun foldF i acc =
            if not (Continue i) then acc
			else
            let
                val (name,alpha) = Value i;
            in
                foldF (Next i) (f name alpha acc)
            end
    in
        foldF (hashOver table)
    end;

    (* If this is a mutable hash table create an immutable one from it.  The reason
	   for this is to reduce the number of mutables stored.
	   TODO: Possible change the size of the table rather than simply copy it. *)
    fun hashFreeze (Hash{entries = ref e, hash = ref hashN, ...}) =
	    Frozen{ entries = Array.vector e, hash = hashN }
	|   hashFreeze h = h


end (* local *);

end (* HashTable *);