File: ErodeAlgorithm.h

package info (click to toggle)
abyss 2.3.10-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,284 kB
  • sloc: cpp: 78,182; ansic: 6,512; makefile: 2,252; perl: 672; sh: 509; haskell: 412; python: 4
file content (117 lines) | stat: -rw-r--r-- 3,030 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#ifndef ASSEMBLY_ERODE_ALGORITHM
#define ASSEMBLY_ERODE_ALGORITHM 1

namespace AssemblyAlgorithms {

/** The number of k-mer that have been eroded. */
extern size_t g_numEroded;

template <typename Graph>
void removeExtensionsToSequence(Graph* seqCollection,
		const typename Graph::value_type& seq, extDirection dir);

/**
 * Remove a k-mer and update the extension records of the k-mer that
 * extend to it.
 */
template <typename Graph>
void removeSequenceAndExtensions(Graph* seqCollection,
		const typename Graph::value_type& seq)
{
	// This removes the reverse complement as well
	seqCollection->remove(seq.first);
	removeExtensionsToSequence(seqCollection, seq, SENSE);
	removeExtensionsToSequence(seqCollection, seq, ANTISENSE);
}

/** Remove all the extensions to this sequence. */
template <typename Graph>
void removeExtensionsToSequence(Graph* seqCollection,
		const typename Graph::value_type& seq, extDirection dir)
{
	typedef typename graph_traits<Graph>::vertex_descriptor V;
	typedef typename Graph::Symbol Symbol;
	typedef typename Graph::SymbolSet SymbolSet;

	SymbolSet extension(seq.second.getExtension(dir));
	V testSeq(seq.first);
	Symbol extBase = testSeq.shift(dir);
	for (unsigned i = 0; i < extension.NUM; ++i) {
		Symbol x(i);
		if (extension.checkBase(x)) {
			testSeq.setLastBase(dir, x);
			seqCollection->removeExtension(testSeq, !dir, extBase);
		}
	}
}

/** Return the number of k-mer that have been eroded. */
static inline
size_t getNumEroded()
{
	size_t numEroded = g_numEroded;
	g_numEroded = 0;
	tempCounter[0] += numEroded;
	logger(0) << "Eroded " << numEroded << " tips.\n";
	return numEroded;
}

/** Consider the specified k-mer for erosion.
 * @return the number of k-mer eroded, zero or one
 */
template <typename Graph>
size_t erode(Graph* c, const typename Graph::value_type& seq)
{
	typedef typename vertex_bundle_type<Graph>::type VP;

	if (seq.second.deleted())
		return 0;
	extDirection dir;
	SeqContiguity contiguity = checkSeqContiguity(seq, dir);
	if (contiguity == SC_CONTIGUOUS)
		return 0;

	const VP& data = seq.second;
	if (data.getMultiplicity() < opt::erode
			|| data.getMultiplicity(SENSE) < opt::erodeStrand
			|| data.getMultiplicity(ANTISENSE) < opt::erodeStrand) {
		removeSequenceAndExtensions(c, seq);
		g_numEroded++;
		return 1;
	} else
		return 0;
}

/** The given sequence has changed. */
static inline
void erosionObserver(SequenceCollectionHash* c,
		const SequenceCollectionHash::value_type& seq)
{
	erode(c, seq);
}

//
// Erode data off the ends of the graph, one by one
//
template <typename Graph>
size_t erodeEnds(Graph* seqCollection)
{
	typedef typename Graph::iterator iterator;

	Timer erodeEndsTimer("Erode");
	assert(g_numEroded == 0);
	seqCollection->attach(erosionObserver);

	for (iterator iter = seqCollection->begin();
			iter != seqCollection->end(); ++iter) {
		erode(seqCollection, *iter);
		seqCollection->pumpNetwork();
	}

	seqCollection->detach(erosionObserver);
	return getNumEroded();
}

} // namespace AssemblyAlgorithms

#endif