File: HashArray.ML

package info (click to toggle)
polyml 5.2.1-1.1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, wheezy
  • size: 19,692 kB
  • ctags: 17,567
  • sloc: cpp: 37,221; sh: 9,591; asm: 4,120; ansic: 428; makefile: 203; ml: 191; awk: 91; sed: 10
file content (178 lines) | stat: -rw-r--r-- 6,072 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
(*
	Copyright (c) 2000
		Cambridge University Technical Services Limited

    Modified David C.J. Matthews 2008

	This library is free software; you can redistribute it and/or
	modify it under the terms of the GNU Lesser General Public
	License as published by the Free Software Foundation; either
	version 2.1 of the License, or (at your option) any later version.
	
	This library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
	Lesser General Public License for more details.
	
	You should have received a copy of the GNU Lesser General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
*)

(* Hash table type - Creates a hash table of specified initial size. This
   version expands the hash table and rehashes when the table gets too full *)

structure HashArray:

(*****************************************************************************)
(*                  HashArray export signature                               *)
(*****************************************************************************)
sig
    type 'a hash
    
    val hash: int -> 'a hash
    val update: 'a hash * string * 'a -> unit
    val sub: 'a hash * string -> 'a option
    
    val fold: (string * 'a * 'b -> 'b) -> 'b -> 'a hash -> 'b
end =

(*****************************************************************************)
(*                  HashArray structure body                                 *)
(*****************************************************************************)
struct

local
  infix 8 sub

  (* Each entry in the table is a pair containing the key and the value. *)
  
  (* make namedOption local, because that means the new compiler can
     optimise its representation. SPF 11/5/95 *)
  datatype 'a namedOption = None | Some of (string * 'a);
in

  fun hashValue (N : int) (str: string) : int =
	  	Word.toInt(
		    Word.mod(
    		  	CharVector.foldr
    				(fn (ch: char, n: Word.word) => Word.fromInt(Char.ord ch) + 0w7*n)
    				0w0 str,
			    (Word.fromInt N)))

(* The above function is the quickest and simplest way of computing the
   hash value now that we have Word.* and Word.mod compiled inline.
   They aren't implemented in all code-generators so it could be worth
   retaining the old code.  DCJM 26/2/01. *)

  datatype 'a hash =
    Hash of
      { 
        used: int ref,
        entries: 'a namedOption array ref,
        hash: (string -> int) ref
      }
  
    fun hash (n : int) =
        Hash
          {
            used    = ref 0,
            entries = ref (Array.array (n, None)),
            hash    = ref (hashValue n)
          }
     
    fun op sub (Hash {entries = ref A, hash = ref hashN, ...}, name : string) : 'a option =
      (* Searches the table starting from the position given by the hash value. *)
    let
        open Array
        (* Loops until it either finds an empty entry - in which case it
           returns NONE, or it finds the name it is looking for.
           There will always be several empty entries because we rehash
           if it gets too full. *)
        fun find i =
        let
            val h = A sub i;
        in
            case h of
                None => NONE
            |   Some (n,v) => 
                    if name = n then SOME v else find ((if i = 0 then length A else i) - 1)
        end
    in
        find (hashN name)
    end

    fun update (Hash {entries, used, hash}, name : string, value :  '_a) =
    let
	    open Array
        (* Enters the value at the first free entry at or after the
           one pointed to by the hash value. *)
        fun enterTab (A : '_a namedOption array, i : int, None : '_a namedOption) = ()
          | enterTab (A, i, entry as Some (name,_)) =
        let
          fun enter (i : int) : unit =
            (* Loops until it either finds an empty entry - in which case it
               enters the value in there, or it finds the string. *)
            case (A sub i) of
              None        => update (A, i, entry)
            | Some (n : string, _) => 
               if n = name
               then update (A, i, entry) (* Same name as previous - overwrite it *)
               else enter ((if i = 0 then length A else i) - 1);
        in
          enter i
        end 
        
        val A : '_a namedOption array = !entries;
        val N : int                   = length A;
        val hashN : string -> int     = !hash
        
        val U : unit = enterTab (A, hashN name, Some (name, value));
        val U : unit = used := !used + 1;
    in
        (* Do we need to rehash ? *)
        if !used * 5 > N * 4 (* More than 80% full so rehash *)
        then
		let
            val newN  : int                   = N * 2; (* Double the size *)
            val newA  : '_a namedOption array = array (newN, None);
            val hashNewN : string -> int      = hashValue newN;
          
            fun copyOver (index : int) : unit =
                if index < 0 then ()
                else
                (
                  case (A sub index) of
                    None                => ()
                  | entry as Some (name : string,_) =>
                      enterTab (newA, hashNewN name, entry);
                  
                  copyOver (index - 1)
                );
            
            val U : unit = entries := newA;
            val U : unit = hash := hashNewN;
        in
            copyOver (length A - 1)
        end
        else ()
    end;

    fun fold f init (Hash { entries = ref e, ...}) =
    let
	    val length = Array.length e

        fun foldF i acc =
            if i >= length then acc
			else foldF (i+1)
                (case Array.sub(e, i) of
                     None => acc
                 |   Some (name,alpha) => f (name,alpha,acc)
                )
    in
        foldF 0 init
    end;

end (* local *);

end (* HashArray *);