File: HashTable.ML

package info (click to toggle)
polyml 5.6-8
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 31,892 kB
  • ctags: 34,453
  • sloc: cpp: 44,983; ansic: 24,520; asm: 14,850; sh: 11,730; makefile: 551; exp: 484; python: 253; awk: 91; sed: 9
file content (217 lines) | stat: -rw-r--r-- 7,714 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
(*
    Copyright (c) 2000
        Cambridge University Technical Services Limited

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.
    
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
    
    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*)

(* Hash table type - Creates a hash table of specified initial size. This
   version expands the hash table and rehashes when the table gets too full *)

structure HashTable:

(*****************************************************************************)
(*                  HashTable export signature                               *)
(*****************************************************************************)
sig
  type 'a hash
  
  val hashMake: int -> 'a hash
  val hashSet: 'a hash * string * 'a -> unit
  val hashSub: 'a hash * string -> 'a option
  
  val fold: (string * 'a * 'b -> 'b) -> 'b -> 'a hash -> 'b
  (* Construct an immutable hash table from a mutable one. *)
  val hashFreeze: 'a hash -> 'a hash
end =

(*****************************************************************************)
(*                  HashTable structure body                                 *)
(*****************************************************************************)
struct

  exception InternalError = Misc.InternalError
  infix 8 sub

  (* Each entry in the table is a pair containing the key and the value. *)
  
  (* make namedOption local, because that means the new compiler can
     optimise its representation. SPF 11/5/95 *)
  datatype 'a namedOption = None | Some of (string * 'a);

  fun hashValue (N : int) (str: string) : int =
        Word.toInt(
            Word.mod(
                CharVector.foldr
                    (fn (ch: char, n: Word.word) => Word.fromInt(Char.ord ch) + 0w7*n)
                    0w0 str,
                (Word.fromInt N)))

(* The above function is the quickest and simplest way of computing the
   hash value now that we have Word.* and Word.mod compiled inline.
   They aren't implemented in all code-generators so it could be worth
   retaining the old code.  DCJM 26/2/01. *)

  datatype 'a hash =
    Hash of
      { 
        used: int ref,
        entries: 'a namedOption array ref,
        hash: (string -> int) ref
      }
   | Frozen of
      {
        entries: 'a namedOption vector,
        hash: (string -> int)
      };
  
    fun hashMake (n : int) =
        Hash
          {
            used    = ref 0,
            entries = ref (Array.array (n, None)),
            hash    = ref (hashValue n)
          }
         handle Size => 
            raise InternalError ("HashTable.hash: " ^ Int.toString n);
    
    fun hashSub (Hash {entries = ref A, hash = ref hashN, ...}, name : string) : 'a option =
      (* Searches the table starting from the position given by the hash value. *)
    let
        open Array
        (* Loops until it either finds an empty entry - in which case it
           returns NONE, or it finds the name it is looking for.
           There will always be several empty entries because we rehash
           if it gets too full. *)
        fun find i =
        let
            val h = A sub i;
        in
            case h of
                None => NONE
            |   Some (n,v) => 
                    if name = n then SOME v else find ((if i = 0 then length A else i) - 1)
        end
    in
        find (hashN name)
    end
  | hashSub (Frozen {entries = V, hash = hashN, ...}, name : string) : 'a option =
    let
        open Vector
        
        fun find i =
        let
            val h = V sub i;
        in
            case h of
                None => NONE
            |   Some (n,v) => 
                    if name = n then SOME v else find ((if i = 0 then length V else i) - 1)
        end
    in
        find (hashN name)
    end

    fun hashSet (Frozen _, name : string, _ :  'a) : unit =
       raise Fail ("Attempt to set a value with name (" ^ name ^ ") in a frozen hash table")
       
    |  hashSet (Hash {entries, used, hash}, name : string, value :  'a) =
    let
        open Array
        (* Enters the value at the first free entry at or after the
           one pointed to by the hash value. *)
        fun enterTab (_ : 'a namedOption array, _ : int, None : 'a namedOption) = ()
          | enterTab (A, i, entry as Some (name,_)) =
        let
          fun enter (i : int) : unit =
            (* Loops until it either finds an empty entry - in which case it
               enters the value in there, or it finds the string. *)
            case (A sub i) of
              None        => update (A, i, entry)
            | Some (n : string, _) => 
               if n = name
               then update (A, i, entry) (* Same name as previous - overwrite it *)
               else enter ((if i = 0 then length A else i) - 1);
        in
          enter i
        end 
        
        val A : 'a namedOption array = !entries;
        val N : int                   = length A;
        val hashN : string -> int     = !hash
        
        val () = enterTab (A, hashN name, Some (name, value));
        val () = used := !used + 1;
    in
        (* Do we need to rehash ? *)
        if !used * 5 > N * 4 (* More than 80% full so rehash *)
        then
        let
            val newN  : int                   = N * 2; (* Double the size *)
            val newA  : 'a namedOption array = array (newN, None);
            val hashNewN : string -> int      = hashValue newN;
          
            fun copyOver (index : int) : unit =
                if index < 0 then ()
                else
                (
                  case (A sub index) of
                    None                => ()
                  | entry as Some (name : string,_) =>
                      enterTab (newA, hashNewN name, entry);
                  
                  copyOver (index - 1)
                );
            
            val () = entries := newA;
            val () = hash := hashNewN;
        in
            copyOver (length A - 1)
        end
        else ()
    end;

    (* Fold a function over each of the items in the table. *)
    fun fold f acc table =
    let
        val (length, getItem) =
            case table of
                Hash { entries = ref e, ...} => (Array.length e, fn i => Array.sub(e, i))
            |   Frozen { entries, ... } => (Vector.length entries, fn i => Vector.sub(entries, i))

        fun foldF i acc =
        if i >= length
        then acc (* End of table. *)
        else
        let
            val next =
                case getItem i of
                    None => acc
                |   Some (name, alpha) => f (name, alpha, acc)
        in
            foldF (i+1) next
        end
    in
        foldF 0 acc
    end

    (* If this is a mutable hash table create an immutable one from it.  The reason
       for this is to reduce the number of mutables stored.
       TODO: Possible change the size of the table rather than simply copy it. *)
    fun hashFreeze (Hash{entries = ref e, hash = ref hashN, ...}) =
        Frozen{ entries = Array.vector e, hash = hashN }
    |   hashFreeze h = h

end (* HashTable *);