File: suffixtree.hpp

package info (click to toggle)
mothur 1.33.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 11,248 kB
  • ctags: 12,231
  • sloc: cpp: 152,046; fortran: 665; makefile: 74; sh: 34
file content (68 lines) | stat: -rw-r--r-- 2,125 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#ifndef SUFFIXTREE_H
#define SUFFIXTREE_H

/*
 *  suffixtree.h
 *  
 *
 *  Created by Pat Schloss on 12/15/08.
 *  Copyright 2008 Patrick D. Schloss. All rights reserved.
 *
 *	This is my half-assed attempt to implement a suffix tree.  This is a cobbled together algorithm using materials that
 *	I found at http://marknelson.us/1996/08/01/suffix-trees/ and:
 *
 *		Ukkonen E. (1995). On-line construction of suffix trees. Algorithmica 14 (3): 249--260
 *		Gusfield, Dan (1999). Algorithms on Strings, Trees and Sequences: Computer Science and Computational Biology. 
 *			USA: Cambridge University Press
 *
 *	The Ukkonen paper is the seminal paper describing the on-line method of constructing a suffix tree.
 *
 *	I have chosen to store the nodes of the tree as a vector of pointers to SuffixNode objects.  The root is stored at
 *	nodeVector[0].  Each tree also stores the sequence name and the string that corresponds to the actual sequence. 
 *	Finally, this class provides a way of counting the number of suffixes that are needed in one tree to generate a new
 *	sequence (countSuffixes).  This method is used to determine similarity between sequences and was inspired by the
 *	article and Perl source code provided at http://www.ddj.com/web-development/184416093.
 *
 */

#include "mothur.h"

class SuffixNode;

//********************************************************************************************************************

class SuffixTree {
	
public:
	SuffixTree();
	~SuffixTree();

	void loadSequence(Sequence);
	string getSeqName();
	void print();	
	int countSuffixes(string, int&);
	int countSuffixes(string);	

private:	
	void addPrefix(int);
	void canonize();
	int split(int, int);
	void makeSuffixLink(int&, int);
	bool isExplicit(){	return activeStartPosition > activeEndPosition;	}
	
	int activeStartPosition;
	int activeEndPosition;
	
	vector<SuffixNode*> nodeVector;
	int root;
	int activeNode;
	int nodeCounter;
	string seqName;
	string sequence;
	MothurOut* m;
	
};

//********************************************************************************************************************

#endif