1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
|
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/range/views/minimiser_hash.hpp>
using seqan3::operator""_dna4;
using seqan3::operator""_shape;
int main()
{
std::vector<seqan3::dna4> text{"CCACGTCGACGGTT"_dna4};
// Here a consecutive shape with size 4 (so the k-mer size is 4) and a window size of 4 is used. The seed is set
// to 0, so lexicographical ordering is used.
auto example_a = text | seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{4}},
seqan3::window_size{4},
seqan3::seed{0});
// results in: [81, 70, 27, 109, 97, 216, 97, 109, 26, 22, 5]
// representing the k-mers [CCAC, CACG, ACGT, CGTC, cgac, TCGA, CGAC, cgtc, ACGG, accg, aacc]
seqan3::debug_stream << example_a << '\n';
auto example_b = text | seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{4}},
seqan3::window_size{8},
seqan3::seed{0});
// results in: [27, 97, 26, 22, 5] representing the k-mers [ACGT, CGAC, ACGG, accg, aacc]
seqan3::debug_stream << example_b << '\n';
auto example_c = text | seqan3::views::minimiser_hash(0b10101_shape,
seqan3::window_size{8},
seqan3::seed{0});
// results in: [9, 18, 7, 6] representing the k-mers [A.G.C, C.A.G, a.c.t, a.c.g]
seqan3::debug_stream << example_c << '\n';
}
|