File: cdb.c

package info (click to toggle)
prayer 1.3.5-dfsg1-8
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,596 kB
  • sloc: ansic: 43,163; makefile: 817; sh: 445; perl: 166
file content (282 lines) | stat: -rw-r--r-- 8,772 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/* $Cambridge: hermes/src/prayer/lib/cdb.c,v 1.3 2008/09/16 09:59:57 dpc22 Exp $ */
/************************************************
 *    Prayer - a Webmail Interface              *
 ************************************************/

/* Copyright (c) University of Cambridge 2000 - 2008 */
/* See the file NOTICE for conditions of use and distribution. */

/* CDB stuff lifted from Exim source code */

/*
 * $Id: cdb.h,v 1.2.2.1 1998/05/29 16:21:36 cvs Exp $
 *
 * Exim - CDB database lookup module
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 * Copyright (c) 1998 Nigel Metheringham, Planet Online Ltd
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 *
 *
 * This code implements Dan Bernstein's Constant DataBase (cdb) spec.
 * Information, the spec and sample code for cdb can be obtained from
 *      http://www.pobox.com/~djb/cdb.html
 *
 * This implementation borrows some code from Dan Bernstein's
 * implementation (which has no license restrictions applied to it).
 * This (read-only) implementation is completely contained within
 * cdb.[ch] it does *not* link against an external cdb library.
 *
 *
 * There are 2 varients included within this code.  One uses MMAP and
 * should give better performance especially for multiple lookups on a
 * modern machine.  The other is the default implementation which is
 * used in the case where the MMAP fails or if MMAP was not compiled
 * in.  this implementation is the same as the original reference cdb
 * implementation.
 *
 */

#include "lib.h"

#define CDB_HASH_SPLIT 256      /* num pieces the hash table is split into */
#define CDB_HASH_MASK  255      /* mask to and off split value */
#define CDB_HASH_ENTRY 8        /* how big each offset it */
#define CDB_HASH_TABLE (CDB_HASH_SPLIT * CDB_HASH_ENTRY)

/* State information for cdb databases that are open NB while the db
 * is open its contents will not change (cdb dbs are normally updated
 * atomically by renaming).  However the lifetime of one of these
 * state structures should be limited - ie a long running daemon
 * that opens one may hit problems....
 */

struct cdb_state {
    int fileno;
    off_t filelen;
    char *cdb_offsets;
};

/* 32 bit unsigned type - this is an int on all modern machines */
typedef unsigned int uint32;

/*
 * cdb_hash()
 * Internal function to make hash value */

static uint32 cdb_hash(unsigned char *buf, unsigned int len)
{
    uint32 h;

    h = 5381;
    while (len) {
        --len;
        h += (h << 5);
        h ^= (uint32) * buf++;
    }
    return h;
}

/*
 * cdb_bread()
 * Internal function to read len bytes from disk, coping with oddities */

static int cdb_bread(int fd, char *buf, int len)
{
    int r;
    while (len > 0) {
        do
            r = read(fd, buf, len);
        while ((r == -1) && (errno == EINTR));
        if (r == -1)
            return -1;
        if (r == 0) {
            errno = EIO;
            return -1;
        }
        buf += r;
        len -= r;
    }
    return 0;
}



/*
 * cdb_bread()
 * Internal function to parse 4 byte number (endian independant) */

/* Argument changed from unsigned char * to char * by PH, in order to stop
complaints from Sun's compiler since all calls to this function pass over
char * arguments. Do the casting to unsigned inside the function. */

static uint32 cdb_unpack(char *buf)
{
    uint32 num;
    unsigned char *ubuf = (unsigned char *) buf;
    num = ubuf[3];
    num <<= 8;
    num += ubuf[2];
    num <<= 8;
    num += ubuf[1];
    num <<= 8;
    num += ubuf[0];
    return num;
}



void *cdb_open(char *filename)
{
    int fileno;
    struct cdb_state *cdbp;
    struct stat statbuf;

    fileno = open(filename, O_RDONLY);
    if (fileno == -1)
        return NIL;

    if (fstat(fileno, &statbuf) == 0) {
        /* If this is a valid file, then it *must* be at least
         * CDB_HASH_TABLE bytes long */
        if (statbuf.st_size < CDB_HASH_TABLE)
            return NIL;
    } else
        return NIL;

    /* Having got a file open we need the structure to put things in */
    if ((cdbp = malloc(sizeof(struct cdb_state))) == NIL)
        return (NIL);

    /* preload the structure.... */
    cdbp->fileno = fileno;
    cdbp->filelen = statbuf.st_size;
    cdbp->cdb_offsets = NIL;

    /* get a buffer to stash the basic offsets in - this should speed
     * things up a lot - especially on multiple lookups */
    if ((cdbp->cdb_offsets = malloc(CDB_HASH_TABLE)) == NIL)
        return (NIL);

    /* now fill the buffer up... */
    if (cdb_bread(fileno, cdbp->cdb_offsets, CDB_HASH_TABLE) == -1) {
        /* read of hash table failed */
        /* call the close routine (deallocs the memory), and return NIL */
        cdb_close(cdbp);
        return NIL;
    }

    /* Everything else done - return the cache structure */
    return cdbp;
}

int cdb_find(void *handle, char *keystring, int key_len, char **result)
{
    struct cdb_state *cdbp = handle;
    uint32 item_key_len,
        item_dat_len,
        key_hash,
        item_hash,
        item_posn,
        cur_offset,
        end_offset, hash_offset_entry, hash_offset, hash_offlen,
        hash_slotnm;
    int loop;

    key_hash = cdb_hash((unsigned char *) keystring, key_len);

    hash_offset_entry = CDB_HASH_ENTRY * (key_hash & CDB_HASH_MASK);
    hash_offset = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry);
    hash_offlen = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry + 4);

    /* If the offset length is zero this key cannot be in the file */
    if (hash_offlen == 0) {
        return NIL;
    }
    hash_slotnm = (key_hash >> 8) % hash_offlen;

    /* check to ensure that the file is not corrupt
     * if the hash_offset + (hash_offlen * CDB_HASH_ENTRY) is longer
     * than the file, then we have problems.... */
    if ((hash_offset + (hash_offlen * CDB_HASH_ENTRY)) > cdbp->filelen) {
        return NIL;
    }

    cur_offset = hash_offset + (hash_slotnm * CDB_HASH_ENTRY);
    end_offset = hash_offset + (hash_offlen * CDB_HASH_ENTRY);

    for (loop = 0; (loop < hash_offlen); ++loop) {
        char packbuf[8];
        if (lseek(cdbp->fileno, (off_t) cur_offset, SEEK_SET) == -1)
            return NIL;
        if (cdb_bread(cdbp->fileno, packbuf, 8) == -1)
            return NIL;
        item_hash = cdb_unpack(packbuf);
        item_posn = cdb_unpack(packbuf + 4);
        /* if the position is zero then we have a definite miss */
        if (item_posn == 0)
            return NIL;

        if (item_hash == key_hash) {
            /* matching hash value */
            if (lseek(cdbp->fileno, (off_t) item_posn, SEEK_SET) == -1)
                return NIL;
            if (cdb_bread(cdbp->fileno, packbuf, 8) == -1)
                return NIL;
            item_key_len = cdb_unpack(packbuf);
            /* check key length matches */
            if (item_key_len == key_len) {
                /* finally check if key matches */
                char *item_key;

                if ((item_key = malloc(key_len)) == NIL)        /* NB: minor memory leak */
                    return NIL;
                if (cdb_bread(cdbp->fileno, item_key, key_len) == -1)
                    return NIL;
                if (strncmp(keystring, item_key, key_len) == 0) {
                    /* matches - get data length */
                    item_dat_len = cdb_unpack(packbuf + 4);
                    /* then we build a new result string */
                    if ((*result = malloc(item_dat_len + 1)) == NIL)
                        return NIL;
                    if (cdb_bread(cdbp->fileno, *result, item_dat_len) ==
                        -1)
                        return NIL;
                    (*result)[item_dat_len] = 0;
                    return T;
                }
            }
        }
        cur_offset += 8;

        /* handle warp round of table */
        if (cur_offset == end_offset)
            cur_offset = hash_offset;
    }
    return NIL;
}

void cdb_close(void *handle)
{
    struct cdb_state *cdbp = handle;

    if (cdbp->cdb_offsets)
        free(cdbp->cdb_offsets);

    close(cdbp->fileno);
    free(cdbp);
}