File: virtualdef.h

package info (click to toggle)
vmatch 2.3.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 17,080 kB
  • sloc: ansic: 70,897; sh: 7,139; perl: 4,152; makefile: 1,169; xml: 642; awk: 563; ruby: 306; haskell: 288; sed: 60
file content (346 lines) | stat: -rw-r--r-- 9,876 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
//\IgnoreLatex{

#ifndef VIRTUALDEF_H
#define VIRTUALDEF_H

#include <limits.h>
#include <stdio.h>
#include "types.h"
#include "alphadef.h"
#include "multidef.h"

#include "pathmax.h"

/*
  This file defines the datatype \texttt{virtualtree} which stores
  a virtual suffix tree. 
*/

/*
  Each of the following bits specifies a table 
  of the virtual suffix tree. Some of these tables refer
  to multiple sequences. Hence this file is also included
  in multiseq-adv.c
*/

typedef enum
{
  TISTABNUM = 0, // transformed input sequence
  OISTABNUM,     // original input sequence
  DESTABNUM,     // the fasta descriptions
  SSPTABNUM,     // the sequence separator positions
  BWTTABNUM,     // the bwt-table
  SUFTABNUM,     // the suf-table
  LCPTABNUM,     // the lcp-table
  BCKTABNUM,     // the bck-table
  STI1TABNUM,    // the small inverse of the suf-table
  SKPTABNUM,     // the skip-table

  // The following tables are mainly for experimental alorithms.

  CLDTABNUM,  // the child table
  CLD1TABNUM, // the small child table
  ISOTABNUM,  // the isomorphic depth table
  STITABNUM,  // the inverse of the suf-table
  CFRTABNUM,  // the cfr table
  CRFTABNUM,  // the crf table
  LSFTABNUM,  // the lsf table
  NUMBEROFVTABS
} Vtablenum;

/*
  The following macro defines a shift of 1 by a given integer.
*/

#define MAKETABBIT(TAB) (UintConst(1) << TAB)

#define TISTAB  UintConst(1)            
#define OISTAB  MAKETABBIT(OISTABNUM)     
#define DESTAB  MAKETABBIT(DESTABNUM)
#define SSPTAB  MAKETABBIT(SSPTABNUM)
#define BWTTAB  MAKETABBIT(BWTTABNUM)    
#define SUFTAB  MAKETABBIT(SUFTABNUM)   
#define LCPTAB  MAKETABBIT(LCPTABNUM)
#define BCKTAB  MAKETABBIT(BCKTABNUM)
#define STI1TAB MAKETABBIT(STI1TABNUM)
#define SKPTAB  MAKETABBIT(SKPTABNUM)

/*
  The following are the ORed flags for the tables storing the
  plain sequences and sequence descriptions.
*/

#define SEQUENCETABS (TISTAB | OISTAB | DESTAB | SSPTAB)

/*
  The following are the ORed flags for the basic tables of the 
  virtual suffix tree.
*/

#define BASICTABS (SEQUENCETABS |\
                   BWTTAB | SUFTAB | LCPTAB | BCKTAB | STI1TAB | SKPTAB)

/*
  Here are some bits for the extra table.
*/

#define CLDTAB  MAKETABBIT(CLDTABNUM)
#define CLD1TAB MAKETABBIT(CLD1TABNUM)
#define ISOTAB  MAKETABBIT(ISOTABNUM)
#define STITAB  MAKETABBIT(STITABNUM)
#define CFRTAB  MAKETABBIT(CFRTABNUM)
#define CRFTAB  MAKETABBIT(CRFTABNUM)
#define LSFTAB  MAKETABBIT(LSFTABNUM)

/*
  These are ORed two.
*/

#define EXTRATABS (CLDTAB | CLD1TAB | ISOTAB |\
                   STITAB | CFRTAB | CRFTAB | LSFTAB)

/*
  The following defines the size of an entry in table bcktab.
*/

#define SIZEOFBCKENTRY (UintConst(2) * (Uint) sizeof(Uint))

/*
  This is the maximal length of an index file of a virtual suffix tree.
*/

#define VIRTUALFILENAMEMAX (PATH_MAX+32)

/*
  The following macro assign to the variable \texttt{VAR}
  the value of \(\mathsf{lcptab}[IND]\) for a
  given index \texttt{IND}. If this
  value equals 255, then we have to resort to the next value of
  the exception table \(\mathsf{llvtab}\) via the variable
  \texttt{EXC}.
*/

#define SEQUENTIALEVALLCPVALUEGENERIC(VAR,LCPTAB,LARGELCPS,IND,EXC)\
        if((VAR = LCPTAB[IND]) == UCHAR_MAX)\
        {\
          VAR = (LARGELCPS)->spacePairUint[(EXC)++].uint1;\
        } 

/*
  The following macro is a special version of the previous macro.
*/

#define SEQUENTIALEVALLCPVALUE(VAR,IND,EXC)\
        SEQUENTIALEVALLCPVALUEGENERIC(VAR,\
                                      virtualtree->lcptab,\
                                      &virtualtree->largelcpvalues,\
                                      IND,\
                                      EXC)

/*
  If suffix \(aw\) is the \(i\)th suffix in the lexicographic order
  of all suffixes, then \texttt{RANKOFNEXTLEAF(virtualtree,i)} delivers
  the rank of suffix \(w\).
*/

#define RANKOFNEXTLEAF(VIRT,LEAFRANK)\
        (VIRT)->stitab[(VIRT)->suftab[LEAFRANK]+1]

/*
  The following macro checks if the number of arguments is exactly
  \texttt{N}. Otherwise, an error message is thrown.
*/

#define VSTREECHECKARGNUM(N,S)\
        if (argc != (N))\
        {\
          fprintf(stderr,"Usage: %s %s\n",argv[0],S);\
          fprintf(stderr,"see Vmatch manual at http:%c%cwww.vmatch.de "\
                         " for more information\n",'/','/');\
          exit(EXIT_FAILURE);\
        }

/*
  The following constants are for special cases in \texttt{cldtab}.
*/

#define UNDEFCHILDVALUE 0          // $cldtab[i].value = undef$
#define LARGECHILDVALUE UCHAR_MAX  // maximal value

/*
  The following stores the information for determining the child
  of an lcp-interval in constant time. The values are relative to
  the index they are stored at.
*/

typedef struct
{
  Uchar up,         // smaller than i: up[i] = i - cldtab[i].up
        down,       // larger than i:  down[i] = i + clddtab[i].up
        nextlIndex; // larger than i:  nexlIndex[i] = i + cldtab[i].nextlIndex
} Childinfo;        // \Typedef{Childinfo}

/*
  The following is the central data structure to represent 
  enhanced suffix arrays.
*/

typedef struct
{
  Uchar *bwttab;      // of length multiseq.totallength + 1
  Uint  *suftab,      // of length multiseq.totallength + 1
        *skiptab,     // of length multiseq.totallength + 1
        *bcktab,      // of length 2 * alphasize^prefixlen
        *stitab,      // of length multiseq.totallength + 1
        *afxtab;      // of length multiseq.totallength
  Uchar *stitab1,     // of length multiseq.totallength + 1
        *lcptab,      // of length multiseq.totallength + 1
        *isodepthtab, // of length multiseq.totallength
        *cldtab1,     // of length multiseq.totallength + 1
        *lsftab;      // of length 2 * (multiseq.totallength + 1)
  Childinfo *cldtab;  // of length multiseq.totallength + 1
  DefinedUint longest; // index of longest suffix in suftab
  Alphabet alpha;    // alpha.mapsize == 0, then no alphabet transformation
  Multiseq multiseq; // input sequence over which index is constructed
  ArrayPairUint largelcpvalues; // the lcp values >= 255
  PairUint *llvcachemin, 
           *llvcachemax; // store address of previously used entry
#ifdef COUNT
  Uint llvcachehit,
       callgetexception;
#endif
  BOOL specialsymbols,  // do the input files contain special symbols?
       rcmindex;        // index is reverse complemented index
  Uint sixframeindex,   // six frame index: either NOTRANSLATIONSCHEME or
                        // number of translation scheme
       numofcodes,      // number of codes = alphasize^prefixlen
       maxbranchdepth,  // the maximal depth of a branching node
       mapped,          // status: which table is mapped
       constructed,     // status: which table is constructed
       prefixlength;    // the prefixlength for the bucket boundaries
} Virtualtree;          // \Typedef{Virtualtree}

/*
  The following structure is used for representing super buckets,
  i.e. a sequence of consecutive buckets.
*/

typedef struct
{
  Uint firstcodenum,    // the code of the first suffix in the superbucket 
       firstlcpvalue,   // the first lcp-value of the superbucket
       firstexceptionindex,  // index of first exception value in this interval
       firstsuftabindex,// the first index of the superbucket
       lastsuftabindex; // the last index of the superbucket
} Superbckinterval;     // \Typedef{Superbckinterval}

/*
  The following defines the type of functions applied to an
  lcp-interval.
*/

typedef Sint (*Processmatchinterval)(void *,PairUint *);

/*
  We add some prototypes for the most important functions related
  to virtual suffix trees. For an explanation of these
  function, see the corresponding modules in 
  \texttt{Mkvtree} and \texttt{Vmatch}.
*/

//\IgnoreLatex{

#ifdef __cplusplus
extern "C" {
#endif

//}

Sint callmkvtree(Argctype argc,
                 const char **argv,
                 BOOL storeonfile,
                 Virtualtree *virtualtree,
                 BOOL storedesc,
                 Showverbose showverbose);

Sint wrapvmatch(Virtualtree *,Virtualtree *,Virtualtree *,Virtualtree *);
Sint completevirtualtree(Virtualtree *,Uint,Showverbose);

//\IgnoreLatex{

#ifdef __cplusplus
}
#endif

//}

/*
  The following macro \texttt{DEFINEEVALLCP} defines the function 
  \texttt{evallcp}. For efficiency reason we recommand to instead 
  use the macro \texttt{EVALLCP}.
*/

#define DEFINEEVALLCP\
        static Uint evallcp(Virtualtree *virtualtree,Uint i)\
        {\
          Uint lcpval = virtualtree->lcptab[i];\
          if(lcpval < UCHAR_MAX)\
          {\
            return lcpval;\
          }\
          return (getexception(virtualtree,i))->uint1;\
        }

#define EVALLCP(VIRT,VAR,IND)\
        {\
          VAR = (VIRT)->lcptab[IND];\
          if(VAR >= UCHAR_MAX)\
          {\
            VAR = (getexception(VIRT,IND))->uint1;\
          }\
        }

/*
  The following macro \texttt{DELIVERHOME} evaluates a unique home
  position in the range \([0,n]\), where \([\texttt{LEFT},\texttt{RIGHT}]\)
  in an lcp-interval. The variables \texttt{LCPLEFT} and \texttt{LCPRIGHT}
  are for temporary use. The resulting unique index is stored in variable
  \texttt{VAR}.
*/

#define DELIVERHOME(VIRT,VAR,LCPLEFT,LCPRIGHT,LEFT,RIGHT)\
        {\
          if((LEFT) > 0)\
          {\
            EVALLCP(VIRT,LCPLEFT,LEFT);\
            EVALLCP(VIRT,LCPRIGHT,RIGHT+1);\
            if(LCPLEFT >= LCPRIGHT)\
            {\
              VAR = LEFT;\
            } else\
            {\
              VAR = RIGHT;\
            }\
          } else\
          {\
            VAR = RIGHT;\
          }\
        }

//\IgnoreLatex{

#define MAXVSTACK 32

#ifdef __cplusplus
extern "C" {
#endif

PairUint *getexception(Virtualtree *virtualtree,Uint key);

#ifdef __cplusplus
}
#endif

#endif

//}