1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
#ifndef ASSEMBLY_ASSEMBLYALGORITHMS_H
#define ASSEMBLY_ASSEMBLYALGORITHMS_H 1
#include "Assembly/BranchGroup.h"
#include "Assembly/Options.h"
#include "Common/Log.h"
#include "Common/Timer.h"
#include <vector>
#include <string>
#include "Common/InsOrderedMap.h"
class Histogram;
/** A summary of the in- and out-degree of a vertex. */
enum SeqContiguity
{
SC_ISLAND, // sequence is completely isolated
SC_ENDPOINT, // one end of the sequence is open
SC_CONTIGUOUS // the sequence is closed on both ends
};
/** De Bruijn graph assembly algorithms. */
namespace AssemblyAlgorithms {
extern std::vector<size_t> tempCounter;
extern InsOrderedMap<std::string,int> tempStatMap;
extern void addToDb(const std::string&, const int&);
static inline
bool extendBranch(BranchRecord& branch,
graph_traits<SequenceCollectionHash>::vertex_descriptor& kmer,
SequenceCollectionHash::SymbolSet ext);
static inline bool
processLinearExtensionForBranch(BranchRecord& branch,
graph_traits<SequenceCollectionHash>::vertex_descriptor& currSeq,
SequenceCollectionHash::SymbolSetPair extensions,
int multiplicity,
unsigned maxLength, bool addKmer = true);
static inline void
initiateBranchGroup(BranchGroup& group,
const graph_traits<SequenceCollectionHash>::vertex_descriptor& seq,
const SequenceCollectionHash::SymbolSet& extension);
template <typename Graph>
void removeSequenceAndExtensions(Graph* seqCollection,
const typename Graph::value_type& seq);
/** Return the kmer which are adjacent to this kmer. */
template <typename V, typename SymbolSet>
void generateSequencesFromExtension(
const V& currSeq,
extDirection dir,
SymbolSet extension,
std::vector<V>& outseqs)
{
typedef typename SymbolSet::Symbol Symbol;
std::vector<V> extensions;
V extSeq(currSeq);
extSeq.shift(dir);
// Check for the existance of the 4 possible extensions
for (unsigned i = 0; i < SymbolSet::NUM; ++i) {
// Does this sequence have an extension?
Symbol x(i);
if (extension.checkBase(x)) {
extSeq.setLastBase(dir, x);
outseqs.push_back(extSeq);
}
}
}
/** Return the adjacency of this sequence.
* @param considerMarks when true, treat a marked vertex as having
* no edges
*/
static inline
SeqContiguity checkSeqContiguity(
const SequenceCollectionHash::value_type& seq,
extDirection& outDir, bool considerMarks = false)
{
assert(!seq.second.deleted());
bool child = seq.second.hasExtension(SENSE)
&& !(considerMarks && seq.second.marked(SENSE));
bool parent = seq.second.hasExtension(ANTISENSE)
&& !(considerMarks && seq.second.marked(ANTISENSE));
if(!child && !parent)
{
//this sequence is completely isolated
return SC_ISLAND;
}
else if(!child)
{
outDir = ANTISENSE;
return SC_ENDPOINT;
}
else if(!parent)
{
outDir = SENSE;
return SC_ENDPOINT;
}
else
{
// sequence is contiguous
return SC_CONTIGUOUS;
}
}
/** Remove all marked k-mer.
* @return the number of removed k-mer
*/
template <typename Graph>
size_t removeMarked(Graph* pSC)
{
typedef typename Graph::iterator iterator;
Timer timer(__func__);
size_t count = 0;
for (iterator it = pSC->begin(); it != pSC->end(); ++it) {
if (it->second.deleted())
continue;
if (it->second.marked()) {
removeSequenceAndExtensions(pSC, *it);
count++;
}
pSC->pumpNetwork();
}
if (count > 0)
logger(1) << "Removed " << count << " marked k-mer.\n";
return count;
}
} // namespace AssemblyAlgorithms
#include "AdjacencyAlgorithm.h"
#include "AssembleAlgorithm.h"
#include "BubbleAlgorithm.h"
#include "CoverageAlgorithm.h"
#include "ErodeAlgorithm.h"
#include "LoadAlgorithm.h"
#include "SplitAlgorithm.h"
#include "TrimAlgorithm.h"
#endif
|