1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
//![include]
#include <iostream>
#include <seqan/seq_io.h>
#include <seqan/journaled_set.h>
using namespace seqan2;
//![include]
//![findPatternInReference]
template <typename TString, typename TPattern>
void findPatternInReference(String<int> & hits,
TString const & reference,
TPattern const & pattern)
{
// [A] Check whether pattern fits into the sequence.
if (length(pattern) > length(reference))
return;
// [B] Iterate over all positions at which the pattern might occur.
for (unsigned pos = 0; pos < length(reference) - length(pattern) + 1; ++pos)
{
bool isHit = true;
// [C] Evaluate all positions of the pattern until you find a mismatch or you have found a hit.
for (unsigned posPattern = 0; posPattern < length(pattern); ++posPattern)
{
if (pattern[posPattern] != reference[posPattern + pos])
{
isHit = false;
break;
}
}
// [D] Report begin position at which pattern matches the sequence.
if (isHit)
appendValue(hits, pos);
}
}
//![findPatternInReference]
//![searchPattern]
template <typename TString, typename TPattern>
void searchPattern(StringSet<String<int> > & hitSet,
StringSet<TString, Owner<JournaledSet> > const & journalSet,
TPattern const & pattern)
{
typedef StringSet<TString, Owner<JournaledSet> > TJournalSet;
typedef typename Host<TJournalSet const>::Type THost;
// Check for valid initial state.
if (empty(host(journalSet)))
{
std::cout << "No reference set. Aborted search!" << std::endl;
return;
}
// Reset the hitSet to avoid phantom hits coming from a previous search.
clear(hitSet);
resize(hitSet, length(journalSet) + 1);
// Access the reference sequence.
THost & globalRef = host(journalSet);
// Search for pattern in the reference sequence.
findPatternInReference(hitSet[0], globalRef, pattern);
// Search for pattern in the journaled sequences.
for (unsigned i = 0; i < length(journalSet); ++i)
{
// findPatternInJournalString(hitSet[i+1], journalSet[i], pattern, hitSet[0]);
}
}
//![searchPattern]
//![loadAndJoin]
template <typename TString, typename TSpec>
inline int
loadAndJoin(StringSet<TString, Owner<JournaledSet> > & journalSet,
SeqFileIn & databaseFile,
JoinConfig<TSpec> const & joinConfig)
{
typedef typename Host<TString>::Type THost;
clear(journalSet);
String<char> seqId;
THost sequence;
// No sequences in the fasta file!
if (atEnd(databaseFile))
{
std::cerr << "Empty FASTA file." << std::endl;
return -1;
}
// First read sequence for reference sequence.
readRecord(seqId, sequence, databaseFile);
// We have to create the global reference sequence otherwise we loose the information after this function terminates.
createHost(journalSet, sequence);
// If there are more
while (!atEnd(databaseFile))
{
readRecord(seqId, sequence, databaseFile);
appendValue(journalSet, TString(sequence));
join(journalSet, length(journalSet) - 1, joinConfig);
}
return 0;
}
//![loadAndJoin]
//![main]
int main()
{
// Definition of the used types.
typedef String<Dna, Alloc<> > TSequence;
typedef String<Dna, Journaled<Alloc<>, SortedArray, Alloc<> > > TJournal;
typedef StringSet<TJournal, Owner<JournaledSet> > TJournaledSet;
// Open the stream to the file containing the sequences.
CharString seqDatabasePath = getAbsolutePath("demos/tutorial/journaled_set/sequences.fasta");
SeqFileIn databaseFile(toCString(seqDatabasePath));
// Reading each sequence and journal them.
TJournaledSet journalSet;
JoinConfig<GlobalAlign<JournaledCompact> > joinConfig;
loadAndJoin(journalSet, databaseFile, joinConfig);
// Define a pattern and start search.
StringSet<String<int> > hitSet;
TSequence pattern = "GTGGT";
std::cout << "Search for: " << pattern << ":\n";
searchPattern(hitSet, journalSet, pattern);
//![main]
//![printResult]
if (empty(hitSet[0]))
{
std::cout << "No hit in reference " << std::endl;
}
else
{
std::cout << "Hit in reference " << " at ";
for (unsigned j = 0; j < length(hitSet[0]); ++j)
std::cout << hitSet[0][j] << ": " << infix(host(journalSet), hitSet[0][j], hitSet[0][j] + length(pattern)) << "\t";
}
std::cout << std::endl;
std::cout << "Done!" << std::endl;
return 0;
}
//![printResult]
|