File: hashtable.c

package info (click to toggle)
md5deep 3.6-1
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 1,220 kB
  • ctags: 667
  • sloc: ansic: 7,134; sh: 3,195; makefile: 105
file content (242 lines) | stat: -rw-r--r-- 5,523 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

#include "main.h"

/* $Id: hashtable.c 207 2009-12-22 00:33:33Z jessekornblum $ */

status_t file_data_compare(state *s, file_data_t *a, file_data_t *b)
{
  int partial_null = FALSE, partial_match = FALSE, partial_failure = FALSE;
  hashname_t i;
  
  if (NULL == a || NULL == b || NULL == s)
    return status_unknown_error;  
  
  /* We first compare the hashes because they should tell us the fastest if we're
     looking at different files. Then the file size and finally the file name. */
  for (i = 0 ; i < NUM_ALGORITHMS ; ++i)
  {
    if (s->hashes[i]->inuse)
    {
      /* We have to avoid calling STRINGS_EQUAL on NULL values, but
         don't have to worry about capitalization */
      if (NULL == a->hash[i] || NULL == b->hash[i])
	partial_null = TRUE;
      else if (STRINGS_CASE_EQUAL(a->hash[i],b->hash[i]))
	partial_match = TRUE;
      else
	partial_failure = TRUE;
    }
  }

  /* Check for when there are no intersecting hashes */
  if (!partial_match && !partial_failure)
    return status_no_match;

  if (partial_failure)
  {
    if (partial_match)
      return status_partial_match;
    else
      return status_no_match;
  }
  
  if (a->file_size != b->file_size)
    return status_file_size_mismatch;

  /* We can't compare something that's NULL */
  if (NULL == a->file_name || NULL == b->file_name)
    return status_file_name_mismatch;
  if (!(WSTRINGS_EQUAL(a->file_name,b->file_name)))
    return status_file_name_mismatch;
  
  return status_match;
}


/* These two functions are the "hash" functions for the hash table. 
   Because the original implementation of this code was for storing
   md5 hashes, I used the name "translate" to avoid confusion. */


/* Convert a single hexadecimal character to decimal. If c is not a valid
   hex character, returns 0. */
static int translate_char(char c) 
{
  /* If this is a digit */
  if (c > 47 && c < 58) 
    return (c - 48);
  
  c = toupper(c);
  /* If this is a letter... 'A' should be equal to 10 */
  if (c > 64 && c < 71) 
    return (c - 55);

  return 0;
}
    
/* Translates a hex value into it's appropriate index in the array.
   In reality, this just turns the first HASH_SIG_FIGS into decimal */
static uint64_t translate(char *n) 
{ 
  int count;
  uint64_t total = 0, power = 1;

  if (NULL == n)
    internal_error("%s: translate called on NULL string", __progname);

  for (count = HASH_TABLE_SIG_FIGS - 1 ; count >= 0 ; count--) 
  {
    total += translate_char(n[count]) * power;
    power *= 16;
  }

  return total;
}



void hashtable_init(hashtable_t *t)
{
  uint64_t i;
  for (i = 0 ; i < HASH_TABLE_SIZE ; ++i)
    t->member[i] = NULL;
}



status_t hashtable_add(state *s, hashname_t alg, file_data_t *f)
{
  hashtable_entry_t *new, *temp;
  hashtable_t *t = s->hashes[alg]->known;

  if (NULL == t || NULL == f)
    return status_unknown_error;
  
  uint64_t key = translate(f->hash[alg]);
  
  if (NULL == t->member[key])
  {
    new = (hashtable_entry_t *)malloc(sizeof(hashtable_entry_t));
    if (NULL == new)
      return status_out_of_memory;
    
    new->next = NULL;
    new->data = f;
    
    t->member[key] = new;
      return status_ok;
  }

  temp = t->member[key];

  /* If this value is already in the table, we don't need to add it again */
  if (file_data_compare(s,temp->data,f) == status_match)
    return status_ok;

  while (temp->next != NULL)
  {
    temp = temp->next;
    
    if (file_data_compare(s,temp->data,f) == status_match)
      return status_ok;
  }

  new = (hashtable_entry_t *)malloc(sizeof(hashtable_entry_t));
  if (NULL == new)
    return status_out_of_memory;
  
  new->next = NULL;
  new->data = f;
  
  temp->next = new;
  return status_ok;
}



void hashtable_destroy(hashtable_entry_t *e)
{
  hashtable_entry_t *tmp;

  if (NULL == e)
    return;
  
 while (e != NULL)
  {
    tmp = e->next;
    free(e);
    e = tmp;
  }
}


hashtable_entry_t * 
hashtable_contains(state *s, hashname_t alg)
{
  hashtable_entry_t *ret = NULL, *new, *temp, *last = NULL;
  hashtable_t *t; 
  uint64_t key;
  file_data_t * f;
  status_t status;

  if (NULL == s)
    internal_error("%s: state is NULL in hashtable_contains", __progname);
  
  f = s->current_file;
  if (NULL == f)
    internal_error("%s: current_file is in hashtable_contains", __progname);

  key = translate(f->hash[alg]);
  t   = s->hashes[alg]->known;

  //  print_status("key: %"PRIx64, key);

  if (NULL == t->member[key])
    return NULL;

  //  print_status("Found one or more possible hits.");
  
  temp = t->member[key];

  status = file_data_compare(s,temp->data,f);
  //  print_status("First entry %d", status);
  if (status != status_no_match)
    {
      //      print_status("hit on first entry %d", status);
      ret = (hashtable_entry_t *)malloc(sizeof(hashtable_entry_t));
      ret->next = NULL;
      ret->status = status;
      ret->data = temp->data;
      last = ret;
    }

  while (temp->next != NULL)
    {
      temp = temp->next;
    
      status = file_data_compare(s,temp->data,f);
      
      if (status != status_no_match)
	{
	  if (NULL == ret)
	    {
	      ret = (hashtable_entry_t *)malloc(sizeof(hashtable_entry_t));
	      ret->next = NULL;
	      ret->data = temp->data;
	      ret->status = status;
	      last = ret;
	    }
	  else
	    {
	      new = (hashtable_entry_t *)malloc(sizeof(hashtable_entry_t));
	      new->next = NULL;
	      new->data = temp->data;
	      new->status = status;
	      last->next = new;
	      last = new;
	    }
	}
    }
  
  return ret;
}