File: nadsrch.c

package info (click to toggle)
libhsync 0.5.7-1.2
  • links: PTS
  • area: main
  • in suites: woody
  • size: 1,060 kB
  • ctags: 543
  • sloc: sh: 7,944; ansic: 5,413; makefile: 154
file content (131 lines) | stat: -rw-r--r-- 4,768 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*=                                     -*- c-file-style: "bsd" -*-
 * rproxy -- dynamic caching and delta update in HTTP
 * $Id: nadsrch.c,v 1.5 2000/08/11 02:52:56 mbp Exp $
 * 
 * Copyright (C) 1999, 2000 by Martin Pool <mbp@humbug.org.au>
 * Copyright (C) 1999 by Andrew Tridgell <tridge@samba.org>
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */


                              /*
                               | To walk on water you've gotta sink 
                               | in the ice.
                               |   -- Shihad, `The General Electric'.
                              */



/*
 * TODO: Rolling checksums, rather than computing the weak checksum
 * from scratch every time -- at least only do this in `painful
 * honesty' mode.
 */

/*
 * XXX: Copy commands are not sent with the right length -- they
 * should be shortened when approaching EOF.
 *
 * XXX: What happens when we're searching a file that is now longer
 * than it was before?  Can we still match the short block from the
 * last one, or do we not care?
 *
 * The answer is that using the current algorithm we cannot 
 */

#include "includes.h"
#include "mapptr.h"
#include "nad_p.h"
#include "search.h"


/*
 * Check whether AVAIL bytes is enough data to do a useful search
 * operation: this means that either it is at least one full search
 * block in length, or we are approaching the end of the file.
 *
 * On returning true, SEARCH_LEN is set to the length of the search
 * block.
 */
static int
_hs_nad_can_search(hs_encode_job_t *job,
                   size_t *search_len)
{
    size_t avail = job->map_len + job->map_off - job->search_cursor;

    if (avail == 0) {
        return 0;
    } else if (avail >= job->search_block_len) {
/*          _hs_trace("plenty of data left; map_len=%d, map_off=%ld, " */
/*                    "search_cursor=%ld, avail=%ld", */
/*                    job->map_len, (long) job->map_off, (long) job->search_cursor, */
/*                    (long) avail); */
        *search_len = job->search_block_len;
        return 1;
    } else if (job->seen_eof) {
/*          _hs_trace("only %d bytes available to search near eof", */
/*                    avail); */
        *search_len = avail;
        return 1;
    } else {
/*          _hs_trace("only %d bytes left but not at eof, need to read again", */
/*                    avail); */
        return 0;
    }
}


/*
 * Try to match at the current search cursor position.  If we find
 * one, then emit an appropriate copy command.  If not, emit a minimal
 * literal command and try again next time.
 */
void
_hs_nad_search_iter(hs_encode_job_t *job)
{
    size_t              this_len; /* length of this match */
    hs_weak_sum_t       this_weak;
    off_t            match_where; /* location of match in old file */
    byte_t const       *base = job->map_p - job->map_off;

    if ((off_t) job->search_cursor >= (off_t) (job->map_len + job->map_off))
        return;

    /* While there's enough data left to do a search: either there's a
     * whole block left, or we're approaching EOF. */
    while (_hs_nad_can_search(job, &this_len)) {
/*          _hs_trace("compare %lu byte block @%lu", (unsigned long) this_len, */
/*                    (unsigned long) job->search_cursor); */

        this_weak = _hs_calc_weak_sum(base + job->search_cursor, this_len);

        if (_hs_search_for_block(this_weak,
                                 base + job->search_cursor, this_len,
                                 job->sums, job->stats,
                                 &match_where)) {
            _hs_trace("found a strong match @%lu+%lu",
                      (unsigned long) match_where,
                      (unsigned long) this_len);
            _hs_nad_got_copy(job, match_where, this_len);
        } else {
            /* If not matched, we just allow searching to proceed.
             * The intermediate literal data will be sent out when we
             * either find another match, need to do more input, or
             * reach EOF. */
            job->search_cursor++;
        }
    }
}