1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
///A tutorial about finding Mums.
#include <iostream>
#include <seqan/index.h>
using namespace seqan;
int main()
{
///We begin with a @Class.StringSet@ that stores multiple strings.
StringSet<String<char> > mySet;
resize(mySet, 3);
mySet[0] = "SeqAn is a library for sequence analysis.";
mySet[1] = "The String class is the fundamental sequence type in SeqAn.";
mySet[2] = "Subsequences can be handled with SeqAn's Segment class.";
///Then we create an @Class.Index@ of this @Class.StringSet@.
typedef Index<StringSet<String<char> > > TMyIndex;
TMyIndex myIndex(mySet);
///To find maximal unique matches (Mums), we use the @Spec.Mums Iterator@
///and set the minimum MUM length to 3.
Iterator<TMyIndex, Mums>::Type myMUMiterator(myIndex, 3);
String<SAValue<TMyIndex>::Type> occs;
while (!atEnd(myMUMiterator))
{
///A multiple match can be represented by the positions it occurs at in every sequence
///and its length. @Function.getOccurrences@ returns an unordered sequence of pairs
///(seqNo,seqOfs) the match occurs at.
occs = getOccurrences(myMUMiterator);
///To order them ascending according seqNo we use @Function.orderOccurrences@.
orderOccurrences(occs);
for (unsigned i = 0; i < length(occs); ++i)
std::cout << getValueI2(occs[i]) << ", ";
///@Function.repLength@ returns the length of the match.
std::cout << repLength(myMUMiterator) << " ";
///The match string itself can be determined with @Function.representative@.
std::cout << "\t\"" << representative(myMUMiterator) << '\"' << std::endl;
++myMUMiterator;
}
return 0;
}
|