File: zbox.h

package info (click to toggle)
centrifuge 1.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 11,864 kB
  • sloc: cpp: 51,936; perl: 1,919; python: 1,538; makefile: 618; sh: 352
file content (97 lines) | stat: -rw-r--r-- 2,749 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef ZBOX_H_
#define ZBOX_H_

#include "btypes.h"

/**
 * Fill z with Z-box information for s.  String z will not be resized
 * and will only be filled up to its size cap.  This is the linear-time
 * algorithm from Gusfield.  An optional sanity-check uses a naive
 * algorithm to double-check results.
 */
template<typename T>
void calcZ(const T& s,
           TIndexOffU off,
           EList<TIndexOffU>& z,
           bool verbose = false,
           bool sanityCheck = false)
{
	size_t lCur = 0, rCur = 0;
	size_t zlen = z.size();
	size_t slen = s.length();
	assert_gt(zlen, 0);
	assert_eq(z[0], 0);
	//assert_leq(zlen, slen);
	for (size_t k = 1; k < zlen && k+off < slen; k++) {
		assert_lt(lCur, k);
		assert(z[lCur] == 0 || z[lCur] == rCur - lCur + 1);
		if(k > rCur) {
			// compare starting at k with prefix starting at 0
			size_t ki = k;
			while(off+ki < s.length() && s[off+ki] == s[off+ki-k]) ki++;
			z[k] = (TIndexOffU)(ki - k);
			assert_lt(off+z[k], slen);
			if(z[k] > 0) {
				lCur = k;
				rCur = k + z[k] - 1;
			}
		} else {
			// position k is contained in a Z-box
			size_t betaLen = rCur - k + 1;
			size_t kPrime = k - lCur;
			assert_eq(s[off+k], s[off+kPrime]);
			if(z[kPrime] < betaLen) {
				z[k] = z[kPrime];
				assert_lt(off+z[k], slen);
				// lCur, rCur unchanged
			} else if (z[kPrime] > 0) {
				int q = 0;
				while (off+q+rCur+1 < s.length() && s[off+q+rCur+1] == s[off+betaLen+q]) q++;
				z[k] = (TIndexOffU)(betaLen + q);
				assert_lt(off+z[k], slen);
				rCur = rCur + q;
				assert_geq(k, lCur);
				lCur = k;
			} else {
				z[k] = 0;
				assert_lt(off+z[k], slen);
				// lCur, rCur unchanged
			}
		}
	}
#ifndef NDEBUG
	if(sanityCheck) {
		// Recalculate Z-boxes using naive quadratic-time algorithm and
		// compare to linear-time result
		assert_eq(0, z[0]);
		for(size_t i = 1; i < z.size(); i++) {
			size_t j;
			for(j = i; off+j < s.length(); j++) {
				if(s[off+j] != s[off+j-i]) break;
			}
			assert_eq(j-i, z[i]);
		}
	}
#endif
}

#endif /*ZBOX_H_*/