File: OAHash.cpp

package info (click to toggle)
mapsembler2 2.2.4%2Bdfsg1-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,288 kB
  • sloc: cpp: 51,204; ansic: 13,165; sh: 542; makefile: 394; asm: 271; python: 28
file content (223 lines) | stat: -rw-r--r-- 5,464 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
// open-addressing hash table with linear probing, follows wikipedia
// to reduce memory, elements with [value == 0] are UNOCCUPIED, deal with it.

#include <errno.h>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <algorithm> // for max

#include "OAHash.h"

using namespace::std;


OAHash::OAHash(uint64_t max_memory) // in bytes
{
    int errcode = 0;
    hash_size = max_memory / sizeof(element_pair);
    if (hash_size == 0)
    {
        printf("empty OAHash allocated\n");
        exit(1);
    }
    nb_inserted_keys = 0;
    data = (element_pair *) calloc( hash_size, sizeof(element_pair));  //create hashtable
    if (data == NULL)
    {
        errcode = errno;
        fprintf(stderr, "OAHash: allocation of %lu bytes ",
                hash_size * sizeof(element_pair));
        errno = errcode;
        perror("failed");
        exit(errcode);
    }
}

OAHash::~OAHash()
{
    free(data);
}

int OAHash::size_entry()
{
    return sizeof(element_pair);
}

// hash functions: [ any integer type, e.g. 64 bits, 128 bits or ttmath ] -> [ 64 bits hash ]
#ifdef _largeint
inline uint64_t OAHash::hashcode(LargeInt<KMER_PRECISION> elem)
{
    // hash = XOR_of_series[hash(i-th chunk iof 64 bits)]
    uint64_t result = 0, chunk, mask = ~0;
    LargeInt<KMER_PRECISION> intermediate = elem;
    int i;
    for (i=0;i<KMER_PRECISION;i++)
    {
        chunk = (intermediate & mask).toInt();
        intermediate = intermediate >> 64;
        result ^= hashcode(chunk);
    }
    return result;
}
#endif

#ifdef _ttmath
inline uint64_t OAHash::hashcode(ttmath::UInt<KMER_PRECISION> elem)
{
    // hash = XOR_of_series[hash(i-th chunk iof 64 bits)
    uint64_t result = 0, to_hash;
    ttmath::UInt<KMER_PRECISION> intermediate = elem;
    uint32_t cmask=~0, chunk;
    int i;
    for (i=0;i<KMER_PRECISION/2;i++)
    {
        // retrieve a 64 bits part to hash 
        (intermediate & cmask).ToInt(chunk);
        to_hash = chunk;
        intermediate >>= 32;
        (intermediate & cmask).ToInt(chunk);
        to_hash |= ((uint64_t)chunk) << 32 ;
        intermediate >>= 32;

        result ^= hashcode(to_hash);
    }
    return result;
}
#endif

#ifdef _LP64
inline uint64_t OAHash::hashcode( __uint128_t elem )
{
    // hashcode(uint128) = ( hashcode(upper 64 bits) xor hashcode(lower 64 bits))
    return (hashcode((uint64_t)(elem>>64)) ^ hashcode((uint64_t)(elem&((((__uint128_t)1)<<64)-1))));
}
#endif

inline uint64_t OAHash::hashcode( uint64_t elem )
{
    uint64_t code = elem;
    
    code = code ^ (code >> 14); //supp
    code = (~code) + (code << 18); 
    code = code ^ (code >> 31);
    code = code * 21; 
    code = code ^ (code >> 11);
    code = code + (code << 6);
    code = code ^ (code >> 22);

    return code;
    
}

bool OAHash::is_occupied(element_pair *element)
{
    return (element->value != 0);
}

OAHash::element_pair * OAHash::find_slot(key_type key)
{
    uint64_t ptr = hashcode(key) % hash_size; 
    element_pair *element = data+ptr;
    uint64_t retries = 0;

    // search until we either find the key, or find an empty slot.
    while ( ( is_occupied(element)) && ( element->key != key ) && (retries < hash_size))
    {
        ptr = (ptr + 1) % hash_size;
        element = data+ptr;
        retries++;
    }
    if (retries == hash_size)
    {
        printf("OAHash: max rehashes reached: %lld (notify a developer)\n",(long long)hash_size);
        exit(1);
    }

    return element;
}

//if graine already here, overwrite old value
void OAHash::insert(key_type graine, int value)
{
    element_pair *element = find_slot(graine);
    if (!is_occupied(element))
    {
        element->key = graine;
        nb_inserted_keys++;
    }
    element->value = value;
}

// increment the value of a graine
void OAHash::increment(key_type graine)
{
    element_pair *element = find_slot(graine);
    if (!is_occupied(element))
    {
        element->key = graine;
        nb_inserted_keys++;
    }
    if( element->value == -1)  element->value = 0; //special case, emulate 0 value with -1, (0 is not a valid value, used for empty cell)
    
    element->value = element->value + 1;
}

bool OAHash::get( key_type graine, int * val)
{ 
    element_pair *element = find_slot(graine);
    if (!is_occupied(element))
        return false;
    if ((element->key) == graine && (val != NULL))
        *val = element->value;
    
     if( element->value ==-1) *val = 0; // 0 is emulated with -1
    return true;
}

bool OAHash::has_key(key_type graine)
{
    return get(graine,NULL);
}


// call start_iterator to reinit the iterator, then do a while(next_iterator()) {..} to traverse every cell
void OAHash::start_iterator()
{
    iterator = data-1;
}


// returns true as long as the iterator contains a valid cell
bool OAHash::next_iterator()
{
    while (1)
    {
        iterator++;
        if (iterator == data+hash_size)
            return false;
        if (iterator->value != 0)
            break;
    }
    return true;
}


float OAHash::load_factor()
{
    return (float)nb_inserted_keys/(float)hash_size;
}


uint64_t OAHash::memory_usage()
{
    return hash_size* sizeof(element_pair); // in bits
}

void OAHash::printstat()
{
    fprintf(stderr,"\n----------------------Stat OA Hash Table ---------------------\n");
    
    fprintf(stderr,"max elements: %lld, memory usage: %lld\n",(long long)hash_size,(long long)memory_usage());
    fprintf(stderr,"load factor: %.2f\n",load_factor());
}