File: rset.h

package info (click to toggle)
idzebra 2.2.8-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 10,572 kB
  • sloc: ansic: 54,389; xml: 27,058; sh: 5,892; makefile: 1,102; perl: 210; tcl: 64
file content (276 lines) | stat: -rw-r--r-- 10,611 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
/* This file is part of the Zebra server.
   Copyright (C) Index Data

Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/

#ifndef RSET_H
#define RSET_H

#include <yaz/yaz-util.h>
/* unfortunately we need the isam includes here, for the arguments for */
/* rsisamX_create */
#include <idzebra/isamb.h>
#include <idzebra/isamc.h>
#include <idzebra/isams.h>

YAZ_BEGIN_CDECL

typedef struct rsfd *RSFD;
typedef struct rset *RSET;

struct ord_list {
    int ord;
    struct ord_list *next;
};

struct ord_list *ord_list_create(NMEM nmem);
struct ord_list *ord_list_append(NMEM nmem, struct ord_list *list, int ord);
struct ord_list *ord_list_dup(NMEM nmem, struct ord_list *list);
void ord_list_print(struct ord_list *list);

/**
 * rset_term is all we need to know of a term to do ranking etc.
 * As far as the rsets are concerned, it is just a dummy pointer to
 * be passed around.
 */
struct rset_term {
    char *name;    /** the term itself in internal encoding (UTF-8/raw) */
    char *flags;   /** flags for rank method */
    int  type;     /** Term_type from RPN Query. Actually this
                       is Z_Term_general, Z_Term_numeric,
                       Z_Term_characterString, ..
                       This info is used to return encoded term back for
                       search-result-1 .
                   */
    int reg_type;  /** register type */
    RSET rset;     /** the rset corresponding to this term */
    void *rankpriv;/** private stuff for the ranking algorithm */
    zint hits_limit;/** limit for hits if > 0 */
    char *ref_id;  /** reference for this term */
    struct ord_list *ol;
};

typedef struct rset_term *TERMID;
TERMID rset_term_create (const char *name, int length, const char *flags,
                         int type, NMEM nmem, struct ord_list *ol,
                         int reg_type, zint hits_limit, const char *ref_id);

/** rsfd is a "file descriptor" for reading from a rset */
struct rsfd {  /* the stuff common to all rsfd's. */
    RSET rset;  /* ptr to the rset this FD is opened to */
    void *priv; /* private parameters for this type */
    RSFD next;  /* to keep lists of used/free rsfd's */
    zint counted_items;
    char *counted_buf;
};


/**
 * rset_control has function pointers to all the important functions
 * of a rset. Each type of rset will have its own control block, pointing
 * to the functions for that type. They all have their own create function
 * which is not part of the control block, as it takes different args for
 * each type.
 */
struct rset_control
{
    /** text description of set type (for debugging) */
    char *desc;
/* RSET rs_something_create(const struct rset_control *sel, ...); */
    void (*f_delete)(RSET ct);

    /** recursively fills the terms array with terms. call with curterm=0 */
    /* always counts them all into cur, but of course won't touch the term */
    /* array past max. You can use this to count, set max=0 */
    void (*f_getterms)(RSET ct, TERMID *terms, int maxterms, int *curterm);

    RSFD (*f_open)(RSET ct, int wflag);
    void (*f_close)(RSFD rfd);
    /** forward behaves like a read, but it skips some items first */
    int (*f_forward)(RSFD rfd, void *buf, TERMID *term, const void *untilbuf);
    void (*f_pos)(RSFD rfd, double *current, double *total);
       /* returns -1,-1 if pos function not implemented for this type */
    int (*f_read)(RSFD rfd, void *buf, TERMID *term);
    int (*f_write)(RSFD rfd, const void *buf);
};

/** rset_default_forward implements a generic forward with a read-loop */
int rset_default_forward(RSFD rfd, void *buf, TERMID *term,
                     const void *untilbuf);

/** rset_default_read implements a generic read */
int rset_default_read(RSFD rfd, void *buf, TERMID *term);

void rset_get_one_term(RSET ct,TERMID *terms,int maxterms,int *curterm);

/**
 * key_control contains all there is to know about the keys stored in
 * an isam, and therefore operated by the rsets. Other than this info,
 * all we assume is that all keys are the same size, and they can be
 * memcpy'd around
 */
struct rset_key_control {
    void *context;
    int key_size;
    int scope;  /* default for what level we operate (book/chapter/verse) on*/
                /* usual sysno/seqno is 2 */
    int (*cmp)(const void *p1, const void *p2);
    void (*key_logdump_txt) (int logmask, const void *p, const char *txt);
    zint (*getseq)(const void *p);
    zint (*get_segment)(const void *p);
    int (*filter_func)(const void *p, void *data);
    void *filter_data;
    void (*inc)(struct rset_key_control *kc);
    void (*dec)(struct rset_key_control *kc);
    /* FIXME - Should not need a getseq, it won't make much sense with */
    /* higher-order keys. Use a (generalized) cmp instead, or something */
    /* FIXME - decode and encode, and lots of other stuff */
};

/**
 * A rset is an ordered sequence of keys, either directly from an underlaying
 * isam, or from one of the higher-level operator rsets (and, or, ...).
 * Actually, it is "virtual base class", no pure rsets exist in the system,
 * they all are of some derived type.
 */
typedef struct rset
{
    const struct rset_control *control;
    struct rset_key_control *keycontrol;
    int  refcount;   /* reference count */
    void *priv;      /* stuff private to the given type of rset */
    NMEM nmem;       /* nibble memory for various allocs */
    RSFD free_list;  /* all rfd's allocated but not currently in use */
    RSFD use_list;   /* all rfd's in use */
    int scope;       /* On what level do we count hits and compare them? */
    TERMID term;     /* the term thing for ranking etc */
    int no_children;
    RSET *children;
    zint hits_limit;
    zint hits_count;
    zint hits_round;
    int hits_approx;
} rset;
/* rset is a "virtual base class", which will never exist on its own
 * all instances are rsets of some specific type, like rsisamb, or rsbool
 * They keep their own stuff behind the priv pointer.  */

/* On the old sysno-seqno type isams, the scope was hard-coded to be 2.
 * This means that we count hits on the sysno level, and when matching an
 * 'and', we consider it a match if both term occur within the same sysno.
 * In more complex isams we can specify on what level we wish to do the
 * matching and counting of hits. For example, we can have book / chapter /
 * verse, and a seqno. Scope 2 means then "give me all verses that match",
 * 3 would be chapters, 4 books.
 * The resolution tells how much of the occurences we need to return. If we
 * are doing some sort of proximity, we need to get the seqnos of all
 * occurences, whereas if we are only counting hits, we do not need anything
 * below the scope. Again 1 is seqnos, 2 sysnos (or verses), 3 books, etc.
 */

RSFD rfd_create_base(RSET rs);
int rfd_is_last(RSFD rfd);

RSET rset_create_base(const struct rset_control *sel,
                      NMEM nmem,
                      struct rset_key_control *kcontrol,
                      int scope,
                      TERMID term,
                      int no_children, RSET *children);

void rset_delete(RSET rs);
RSET rset_dup (RSET rs);
void rset_close(RSFD rfd);

#define RSETF_READ       0
#define RSETF_WRITE      1
/* RSFD rset_open(RSET rs, int wflag); */
#define rset_open(rs, wflag) (*(rs)->control->f_open)((rs), (wflag))

/* int rset_forward(RSFD rfd, void *buf, TERMID term, void *untilbuf); */
#define rset_forward(rfd, buf, term, untilbuf) \
    rset_default_forward((rfd), (buf), (term), (untilbuf))

/* void rset_getterms(RSET ct, TERMID *terms, int maxterms, int *curterm); */
#define rset_getterms(ct, terms, maxterms, curterm) \
    (*(ct)->control->f_getterms)((ct),(terms),(maxterms),(curterm))

/* int rset_pos(RSFD fd, double *current, double *total); */
#define rset_pos(rfd,cur,tot) \
    (*(rfd)->rset->control->f_pos)((rfd),(cur),(tot))

/* int rset_read(RSFD rfd, void *buf, TERMID term); */
#define rset_read(rfd, buf, term) rset_default_read((rfd), (buf), (term))

/* int rset_write(RSFD rfd, const void *buf); */
#define rset_write(rfd, buf) (*(rfd)->rset->control->f_write)((rfd), (buf))

/* int rset_type (RSET) */
#define rset_type(rs) ((rs)->control->desc)

/** rset_count counts or estimates the keys in it*/
zint rset_count(RSET rs);

RSET rset_create_temp(NMEM nmem, struct rset_key_control *kcontrol,
                      int scope, const char *temp_path, TERMID term);

RSET rset_create_null(NMEM nmem, struct rset_key_control *kcontrol, TERMID term);

RSET rset_create_not(NMEM nmem, struct rset_key_control *kcontrol,
                     int scope, RSET rset_l, RSET rset_r);

RSET rset_create_between(NMEM nmem, struct rset_key_control *kcontrol,
                         int scope, RSET rset_l, RSET rset_m1, RSET rset_m2,
                         RSET rset_r, RSET rset_attr);

RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol,
                          int scope, TERMID termid, int no_rsets, RSET* rsets);

RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol,
                     int scope, int no_rsets, RSET* rsets);

RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol,
                      int scope, int rset_no, RSET *rset,
                      int ordered, int exclusion, int relation, int distance);

RSET rsisamb_create(NMEM nmem, struct rset_key_control *kcontrol,
                    int scope, ISAMB is, ISAM_P pos, TERMID term);

RSET rsisamc_create(NMEM nmem, struct rset_key_control *kcontrol,
                    int scope, ISAMC is, ISAM_P pos, TERMID term);

RSET rsisams_create(NMEM nmem, struct rset_key_control *kcontrol,
                    int scope, ISAMS is, ISAM_P pos, TERMID term);

void rset_visit(RSET rset, int level);

void rset_set_hits_limit(RSET rs, zint l);

int rset_no_write(RSFD rfd, const void *buf);

YAZ_END_CDECL

#endif
/*
 * Local variables:
 * c-basic-offset: 4
 * c-file-style: "Stroustrup"
 * indent-tabs-mode: nil
 * End:
 * vim: shiftwidth=4 tabstop=8 expandtab
 */