File: hash.c

package info (click to toggle)
simhash 0.0.20110213-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, wheezy
  • size: 124 kB
  • ctags: 63
  • sloc: ansic: 680; sh: 20; makefile: 18
file content (161 lines) | stat: -rw-r--r-- 3,310 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/*
 * Copyright © 2005-2009 Bart Massey
 * ALL RIGHTS RESERVED
 * [This program is licensed under the "3-clause ('new') BSD License"]
 * Please see the file COPYING in the source
 * distribution of this software for license terms.
 */

/*
 * Simple hash table stop list ala corman-leiserson-rivest.
 * Bart Massey 2005/03
 */

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include "hash.h"

/* occupancy is out-of-band.  sigh */
#define EMPTY 0
#define FULL 1
#define DELETED 2
static char *occ;

static int *hash;
static int nhash;

/* for n > 0 */
static int next_pow2(int n) {
    int m = 1;
    while (n > 0) {
	n >>= 1;
	m <<= 1;
    }
    return m;
}

static void hash_alloc(void) {
    int i;
    hash = malloc(nhash * sizeof(int));
    assert(hash);
    occ = malloc(nhash);
    assert(occ);
    for (i = 0; i < nhash; i++)
	occ[i] = EMPTY;
}

/* The occupancy shouldn't be bad, since we only keep small crcs in
   the stop list */

void hash_reset(int size) {
    nhash = 7 * size;
    nhash = next_pow2(nhash);
    hash_alloc();
}

/* Since the input values are crc's, we don't
   try to hash them at all!  they're plenty random
   coming in, in principle. */

static int do_hash_insert(unsigned crc) {
    int count;
    unsigned h = crc;
    for (count = 0; count < nhash; count++) {
	int i = h & (nhash - 1);
	if (occ[i] != FULL) {
	    occ[i] = FULL;
	    hash[i] = crc;
	    return 1;
	}
	if (hash[i] == crc)
	    return 1;
	h += 2 * (nhash / 4) + 1;
    }
    return 0;
}

/* idiot stop-and-copy for deleted references */
static void gc(void) {
    int i;
    int *oldhash = hash;
    char *oldocc = occ;
    hash_alloc();
    for (i = 0; i < nhash; i++) {
	if (oldocc[i] == FULL) {
	    if(!do_hash_insert(oldhash[i])) {
		fprintf(stderr, "internal error: gc failed, table full\n");
		exit(1);
	    }
	}
    }
    free(oldhash);
    free(oldocc);
}

void hash_insert(unsigned crc) {
    if (do_hash_insert(crc))
	return;
    gc();
    if (do_hash_insert(crc))
	return;
    fprintf(stderr, "internal error: insert failed, table full\n");
    abort();
    /*NOTREACHED*/
}

static int do_hash_contains(unsigned crc) {
    int count;
    unsigned h = crc;
    for (count = 0; count < nhash; count++) {
	int i = h & (nhash - 1);
	if (occ[i] == EMPTY)
	    return 0;
	if (occ[i] == FULL && hash[i] == crc)
	    return 1;
	h += 2 * (nhash / 4) + 1;
    }
    return -1;
}

int hash_contains(unsigned crc) {
    int result = do_hash_contains(crc);
    if (result >= 0)
	return result;
    gc();
    result = do_hash_contains(crc);
    if (result >= 0)
	return result;
    fprintf(stderr, "internal error: can't find value, table full\n");
    abort();
    /*NOTREACHED*/
}

static int do_hash_delete(unsigned crc) {
    int count;
    unsigned h = crc;
    for (count = 0; count < nhash; count++) {
	int i = h & (nhash - 1);
	if (occ[i] == FULL && hash[i] == crc) {
	    occ[i] = DELETED;
	    return 1;
	}
	if (occ[i] == EMPTY)
	    return 0;
	h += 2 * (nhash / 4) + 1;
    }
    return -1;
}

int hash_delete(unsigned crc) {
    int result = do_hash_delete(crc);
    if (result >= 0)
	return result;
    gc();
    result = do_hash_delete(crc);
    if (result >= 0)
	return result;
    fprintf(stderr, "internal error: delete failed, table full\n");
    abort();
    /*NOTREACHED*/
}