File: Set.k

package info (click to toggle)
kaya 0.4.2-4
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 4,448 kB
  • ctags: 1,694
  • sloc: cpp: 9,536; haskell: 7,461; sh: 3,013; yacc: 910; makefile: 816; perl: 90
file content (369 lines) | stat: -rw-r--r-- 14,435 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/** -*-C-*-ish
    Kaya standard library
    Copyright (C) 2004, 2005 Edwin Brady

    This file is distributed under the terms of the GNU Lesser General
    Public Licence. See COPYING for licence.
*/
"<summary>Sets: collections of unique values</summary>
<prose>This module contains two implementations for collections of unique values. <dataref>Set</dataref> stores the values in order, whereas <dataref>HashSet</dataref> is unordered but has faster insertion and lookup. The <moduleref>Dict</moduleref> module may also be useful.</prose>"
module Set;

import Prelude;

"<summary>Collection of unique values.</summary>
<prose>A collection of unique values stored as a hash table, which is usually faster than a <dataref>Set</dataref> but loses ordering.</prose>"
abstract data HashSet<a>([[a]] entries, Int buckets, Int(a) hashfn);

// TODO: If it seems worth it, replace this with a balanced binary tree.
// The current version will be too slow for big sets where elements are 
// inserted in some kind of order.
data STree<a> = leaf | node(STree<a> left, a val, STree<a> right);

"<summary>Ordered collection of unique values.</summary>
<prose>An ordered collection of unique values stored as a binary tree. If ordering is not crucial, then <dataref>HashSet</dataref> is likely to be faster. Sets may be iterated over using a <code>for</code> loop.</prose>"
abstract data Set<a>(STree<a> entries, Int(a,a) cmp);

"<argument name='cmp'>The comparison function to use for ordering. It must take two parameters, returning a negative number if the first parameter is smaller (whatever 'smaller' means in this context), positive if it is bigger, and zero if the two parameters are equal. For Ints, for example, the function <code>Int icmp(Int a, Int b) = a-b;</code> could be used. This argument may be omitted and defaults to <functionref>Builtins::compare</functionref>.</argument>
<summary>Create a new empty set.</summary>
<prose>Create a new empty set.</prose>
<related><dataref>Set</dataref></related>
<related><functionref index='1'>add</functionref></related>
<related><functionref index='1'>array</functionref></related>
<related><functionref index='1'>delete</functionref></related>
<related><functionref index='1'>elem</functionref></related>
<related><functionref index='1'>empty</functionref></related>"
public Set<a> newSet(Int(a,a) cmp = compare) = Set(leaf, cmp);

"<argument name='set'>The set to add to</argument>
<argument name='val'>The value to add</argument>
<summary>Add an element into a Set.</summary>
<prose>Add a new element to a Set. If the value is already in the set, does nothing.</prose>
<related><dataref>Set</dataref></related>
<related><functionref index='1'>array</functionref></related>
<related><functionref index='1'>delete</functionref></related>
<related><functionref index='1'>elem</functionref></related>
<related><functionref index='1'>empty</functionref></related>
<related><functionref>newSet</functionref></related>"
public Void add(var Set<a> set, a val)
{
    add(set.entries,set.cmp,val);
}

Void add(var STree<a> t, Int(a,a) cmp, a val)
{
    case t of {
	leaf -> 
	    t = node(leaf, val, leaf);
            return;
      | node(l,v,r) ->
	    cv = cmp(val,v);
	    if (cv<0) {
		add(l, cmp, val); 
		t.left = l;
	    }
	    else if (cv==0) return;
	    else {
		add(r, cmp, val);
		t.right = r;
	    }
    }
}

"<argument name='val'>The value to check for</argument>
<argument name='set'>The set to check</argument>
<summary>Check for an element in a Set.</summary>
<prose>Returns true if the value exists in the set, and false otherwise.</prose>
<related><dataref>Set</dataref></related>
<related><functionref index='1'>add</functionref></related>
<related><functionref index='1'>array</functionref></related>
<related><functionref index='1'>delete</functionref></related>
<related><functionref index='1'>empty</functionref></related>
<related><functionref>newSet</functionref></related>"
public Bool elem(a val, Set<a> set)
{
    t = set.entries;
    repeat case t of {
	leaf -> return false;
      | node(l,v,r) ->
	    cv = set.cmp(val,v);
            if (cv<0) t = l;
	    else if (cv==0) return true;
	    else t = r;
    }
    return false;
}

"<argument name='set'>The set to delete from</argument>
<argument name='val'>The value to delete</argument>
<summary>Remove an element from a Set.</summary>
<prose>Remove an element from a Set. If the value is not in the set, does nothing.</prose>
<related><dataref>Set</dataref></related>
<related><functionref index='1'>add</functionref></related>
<related><functionref index='1'>array</functionref></related>
<related><functionref index='1'>elem</functionref></related>
<related><functionref index='1'>empty</functionref></related>
<related><functionref>newSet</functionref></related>"
public Void delete(var Set<a> set, a val)
{
    delete(set.entries, val, set.cmp);
}

"<argument name='block'>The block of code to execute for each set element.</argument>
<argument name='set'>The set to traverse</argument>
<summary>Iteration over Sets</summary>
<prose>Used by <code>for</code> loops to traverse <dataref>Set</dataref> data structure. It is unlikely that you will need to call this function directly.</prose>"
public Void traverse(Bool(a, Int) block, Set<a> set) {
    traverseAux(0, block, set.entries);
}

Void traverseAux(var Int i, Bool(a,Int) block, STree<a> t) {
    case t of {
	leaf -> return;
	| node(l,v,r) -> 
	      traverseAux(i, block, l);
              if (!block(v, i)) return;
	      i=i+1;
	      traverseAux(i, block, r);
    }
}

Void delete(var STree<a> t, a val, Int(a,a) cmp)
{
    // First find the node
    case t of {
	leaf -> return; // phew, easy
      | node(l,v,r) ->
	    cv = cmp(val,v);
	    if (cv<0) {
		delete(l, val, cmp);
		t.left = l;
	    } else if (cv == 0) {
		// Found it
		deleteRoot(t);
	    } else {
		delete(r, val, cmp);
		t.right = r;
	    }
    }
}

Void deleteRoot(var STree<a> t)
{
    // No left child, replace with right child.
    // (Obviously works if no child at all, since t then becomes a leaf)
    if (t.left==leaf) {
//	putStrLn("in case 1");
	t = t.right;
	return;
    }
    // No right child, replace node with left child.
    if (t.right==leaf) {
//	putStrLn("in case 2");
	t = t.left;
	return;
    }
    // Now we definitely have two children
    // Find the leftmost value in the right tree and use that as the
    // value of the node.
    val = deleteLeftmost(t.right);
    t.val = val;
}

a deleteLeftmost(var STree<a> t)
{
//    putStrLn("in deleteLeftmost");
    if (t.left == leaf) {
	val = t.val;
	t = t.right;
	return val;
    } else {
	return deleteLeftmost(t.left);
    }
}


"<argument name='set'>The set to check</argument>
<summary>Check whether a Set is empty</summary>
<prose>Returns true if the Set is empty, false otherwise.</prose>
<related><dataref>Set</dataref></related>
<related><functionref index='1'>add</functionref></related>
<related><functionref index='1'>array</functionref></related>
<related><functionref index='1'>delete</functionref></related>
<related><functionref index='1'>elem</functionref></related>
<related><functionref>newSet</functionref></related>"
public Bool empty(Set<a> set) = set.entries == leaf;

"<argument name='set'>The set to coerce</argument>
<summary>Return all elements of a Set</summary>
<prose>Return all the elements of a Set as an array, ordered according to the set's ordering predicate.</prose>
<related><dataref>Set</dataref></related>
<related><functionref index='1'>add</functionref></related>
<related><functionref index='1'>delete</functionref></related>
<related><functionref index='1'>elem</functionref></related>
<related><functionref index='1'>empty</functionref></related>
<related><functionref>newSet</functionref></related>"
public [a] array(Set<a> set)
{
    arr = [];
    arrAux(set.entries, arr);
    return arr;
}

Void arrAux(STree<a> t, var [a] arr)
{
    case t of {
	leaf -> ;
      | node(l,v,r) ->
	    arrAux(l, arr);
  	    push(arr,v);
	    arrAux(r, arr);
    }
}

"<argument name='buckets'>The number of hashing buckets to use. A larger number of buckets increases the speed of the dictionary (up to a limit) but uses more memory. A good choice is the nearest prime number to <code>1.5*(<variable>expected number of entries</variable>)</code>, with the default being 157.</argument>
<argument name='hashfn'>The hashing function to use. This function must take a key, and return an integer. A good hashing function will return different values for similar keys (but must of course always return the same value for the same key!). A default built-in hashing function is provided, though if the keys are of type <code>String</code>, the <functionref>Builtins::strHash</functionref> function should be used instead, and if the keys are of type <code>Int</code> you may use <functionref>Builtins::identity</functionref> (though an array may provide faster insertion and lookup if the keys are positive and either small or largely sequential). If the keys are an especially complex data type, it may again be best to write your own hashing function. This argument may be omitted and defaults to <functionref>Builtins::hash</functionref>.</argument>
<summary>Create a new empty HashSet.</summary>
<prose>Create a new empty HashSet.</prose>
<related><dataref>HashSet</dataref></related>
<related><functionref>add</functionref></related>
<related><functionref>array</functionref></related>
<related><functionref>delete</functionref></related>
<related><functionref>elem</functionref></related>
<related><functionref>empty</functionref></related>"
public HashSet<a> newHashSet(Int buckets = 157, Int(a) hashfn = hash) 
    = HashSet(createArray(buckets),buckets,hashfn);

"<argument name='block'>The block of code to execute for each set element.</argument>
<argument name='set'>The HashSet to traverse</argument>
<summary>Iteration over HashSets</summary>
<prose>Used by <code>for</code> loops to traverse <dataref>HashSet</dataref> data structure. It is unlikely that you will need to call this function directly.</prose>
<prose>The order in which a HashSet is traversed is undefined.</prose>"
public Void traverse(Bool(a, Int) block, HashSet<a> set) {
  i = 0;
  for ps in set.entries {
    if (isInitialised(ps)) {
      for p in ps {
	if (!block(p,i)) { return; } i++;
      }
    }
  }
}


"<argument name='set'>The set to add to</argument>
<argument name='val'>The value to add</argument>
<summary>Add an element into a HashSet.</summary>
<prose>Add a new element to a HashSet. If the value is already in the set, does nothing.</prose>
<related><dataref>HashSet</dataref></related>
<related><functionref>array</functionref></related>
<related><functionref>delete</functionref></related>
<related><functionref>elem</functionref></related>
<related><functionref>empty</functionref></related>
<related><functionref>newHashSet</functionref></related>"
public Void add(var HashSet<a> set, a key) {
    // Find the right bucket
    bucket = abs(set.hashfn(key)) % set.buckets;
    if (!isInitialised(set.entries[bucket])) {
      // shouldn't need to be very large 
      set.entries[bucket] = createArray(5);
    }
    // If a is already a key, no-op.
    for p in set.entries[bucket] {
	if (p==key) {
	    return;
	}
    }
    push(set.entries[bucket],key);
}

"<argument name='val'>The value to check for</argument>
<argument name='set'>The set to check</argument>
<summary>Check for an element in a HashSet.</summary>
<prose>Returns true if the value exists in the HashSet, and false otherwise.</prose>
<related><dataref>HashSet</dataref></related>
<related><functionref>add</functionref></related>
<related><functionref>array</functionref></related>
<related><functionref>delete</functionref></related>
<related><functionref>empty</functionref></related>
<related><functionref>newHashSet</functionref></related>"
public Bool elem(a val, HashSet<a> set) {
    // Find the right bucket
    bucket = abs(set.hashfn(val)) % set.buckets;
    d = set.entries[bucket];
    if (!isInitialised(d)) {
      // can't be here
      return false;
    }
    for p in d {
	if (p==val) {
	    return true;
	}
    }
    return false;
}

"<argument name='set'>The set to delete from</argument>
<argument name='val'>The value to delete</argument>
<summary>Remove an element from a HashSet.</summary>
<prose>Remove an element from a HashSet. If the value is not in the set, does nothing.</prose>
<related><dataref>HashSet</dataref></related>
<related><functionref>add</functionref></related>
<related><functionref>array</functionref></related>
<related><functionref>elem</functionref></related>
<related><functionref>empty</functionref></related>
<related><functionref>newHashSet</functionref></related>"
public Void delete(HashSet<a> set, a key) {
    // Find the right bucket
    bucket = abs(set.hashfn(key)) % set.buckets;
    if (!isInitialised(set.entries[bucket])) {
      return; //can't be in here
    }
    ps = set.entries[bucket];
    // If a is a key in ps, remove it.
    for z in [0..(size(ps)-1)] {
	if (ps[z]==key) {
	  removeAt(ps,z);
	  return;
	}
    }
}

"<argument name='set'>The set to check</argument>
<summary>Check whether a HashSet is empty</summary>
<prose>Returns true if the HashSet is empty, false otherwise.</prose>
<related><dataref>HashSet</dataref></related>
<related><functionref>add</functionref></related>
<related><functionref>array</functionref></related>
<related><functionref>delete</functionref></related>
<related><functionref>elem</functionref></related>
<related><functionref>newHashSet</functionref></related>"
public Bool empty(HashSet<a> set) {
    for ps in set.entries {
	for p in ps {
	  return false;
	}
    }
    return true;
}

"<argument name='set'>The set to coerce</argument>
<summary>Return all elements of a HashSet</summary>
<prose>Return all the elements of a HashSet as an array. The elements will be returned in an arbitrary order.</prose>
<related><dataref>HashSet</dataref></related>
<related><functionref>add</functionref></related>
<related><functionref>delete</functionref></related>
<related><functionref>elem</functionref></related>
<related><functionref>empty</functionref></related>
<related><functionref>newHashSet</functionref></related>"
public [a] array(HashSet<a> set) {
    keys = [];
    for ps in set.entries {
      if (isInitialised(ps)) {
	for p in ps {
	    push(keys,p);
	}
      }
    }
    return keys;
}