1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
|
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/range/views/minimiser_hash.hpp>
using seqan3::operator""_dna4;
int main()
{
std::vector<seqan3::dna4> text{"CCACGTCGACGGTT"_dna4};
// Here a consecutive shape with size 4 (so the k-mer size is 4) and a window size of 8 is used.
auto minimisers = text | seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{4}},
seqan3::window_size{8});
// results in: [10322096095657499240, 10322096095657499142, 10322096095657499224]
// representing the k-mers [GTAC, TCGA, GACG]
seqan3::debug_stream << minimisers << '\n';
// Get hash values
uint64_t seed = 0x8F3F73B5CF1C9ADE; // The default seed from minimiser_hash
// Use XOR on all minimiser values
auto hash_values = minimisers | std::views::transform([seed] (uint64_t i) {return i ^ seed; });
seqan3::debug_stream << hash_values << '\n'; // results in: [182, 216, 134]
}
|