1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
/*= -*- c-file-style: "bsd" -*-
* rproxy -- dynamic caching and delta update in HTTP
* $Id: search.c,v 1.20 2000/08/06 12:50:36 mbp Exp $
*
* Copyright (C) 1999, 2000 by Martin Pool <mbp@humbug.org.au>
* Copyright (C) 1999 by Andrew Tridgell
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* This file contains code for searching the sumset for matching
* values.
*/
/*
* TODO: The common case is that the next block in both streams
* match. Can we make that a bit faster at all? We'd need to perhaps
* add a link forward between blocks in the sum_struct corresponding
* to the order they're found in the input; then before doing a search
* we can just check that pointer.
*/
#include "includes.h"
#include "search.h"
#include "sum_p.h"
#define TABLESIZE (1<<16)
#define NULL_TAG (-1)
#define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
#define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
static int
_hs_compare_targets(struct target const *t1, struct target const *t2)
{
return ((int) t1->t - (int) t2->t);
}
int
_hs_build_hash_table(hs_sumset_t * sums)
{
int i;
sums->tag_table = calloc(TABLESIZE, sizeof sums->tag_table[0]);
if (sums->count > 0) {
sums->targets = calloc(sums->count, sizeof(struct target));
for (i = 0; i < sums->count; i++) {
sums->targets[i].i = i;
sums->targets[i].t = gettag(sums->block_sums[i].weak_sum);
}
/* FIXME: Perhaps if this operating system has comparison_fn_t
* like GNU, then use it in the cast. But really does anyone
* care? */
qsort(sums->targets, sums->count,
sizeof(sums->targets[0]),
(int (*)(const void *, const void *)) _hs_compare_targets);
}
for (i = 0; i < TABLESIZE; i++)
sums->tag_table[i] = NULL_TAG;
for (i = sums->count - 1; i >= 0; i--) {
sums->tag_table[sums->targets[i].t] = i;
}
return 0;
}
/*
* See if there is a match for the specified block INBUF..BLOCK_LEN in
* the checksum set, using precalculated WEAK_SUM.
*
* If we don't find a match on the weak checksum, then we just give
* up. If we do find a weak match, then we proceed to calculate the
* strong checksum for the current block, and see if it will match
* anything.
*/
int
_hs_search_for_block(hs_weak_sum_t weak_sum,
byte_t const *inbuf, size_t block_len,
hs_sumset_t const *sums, hs_stats_t * stats,
off_t * match_where)
{
int hash_tag = gettag(weak_sum);
int j = sums->tag_table[hash_tag];
byte_t strong_sum[DEFAULT_SUM_LENGTH];
int got_strong = 0;
if (j == NULL_TAG) {
return 0;
}
for (; j < sums->count && sums->targets[j].t == hash_tag; j++) {
int i = sums->targets[j].i;
int token;
if (weak_sum != sums->block_sums[i].weak_sum)
continue;
/* also make sure the two blocks are the same length */
/* l = MIN(s->n,len-offset); */
/* if (l != s->block_sums[i].len) continue; */
/* if (!done_csum2) { */
/* map = (schar *)map_ptr(buf,offset,l); */
/* get_checksum2((char *)map,l,sum2); */
/* done_csum2 = 1; */
/* } */
token = sums->block_sums[i].i;
_hs_trace("found weak match for %08x in token %d", weak_sum, token);
if (!got_strong) {
_hs_calc_strong_sum(inbuf, block_len, strong_sum,
DEFAULT_SUM_LENGTH);
got_strong = 1;
}
/* FIXME: Use correct dynamic sum length! */
if (memcmp(strong_sum, sums->block_sums[i].strong_sum,
DEFAULT_SUM_LENGTH) == 0) {
/* XXX: This is a remnant of rsync: token number 1 is the * block
* at offset 0. It would be good to clear this * up. */
*match_where = (token - 1) * sums->block_len;
return 1;
} else {
_hs_trace("this was a false positive, the strong sums "
"don't match");
stats->false_matches++;
}
}
return 0;
}
|