File: seed_example.cpp

package info (click to toggle)
seqan3 3.4.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,580 kB
  • sloc: cpp: 145,192; sh: 307; xml: 264; javascript: 95; makefile: 70; perl: 29; php: 15
file content (31 lines) | stat: -rw-r--r-- 1,312 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0

#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/search/views/minimiser_hash.hpp>

using namespace seqan3::literals;

int main()
{
    std::vector<seqan3::dna4> text{"CCACGTCGACGGTT"_dna4};

    // Here a consecutive shape with size 4 (so the k-mer size is 4) and a window size of 8 is used.
    auto minimisers = text | seqan3::views::minimiser_hash(seqan3::shape{seqan3::ungapped{4}}, seqan3::window_size{8});
    // results in: [10322096095657499240, 10322096095657499142, 10322096095657499224]
    // representing the k-mers [GTAC, TCGA, GACG]
    seqan3::debug_stream << minimisers << '\n';

    // Get hash values
    uint64_t seed = 0x8F'3F'73'B5'CF'1C'9A'DE; // The default seed from minimiser_hash
    // Use XOR on all minimiser values
    auto hash_values = minimisers
                     | std::views::transform(
                           [seed](uint64_t i)
                           {
                               return i ^ seed;
                           });
    seqan3::debug_stream << hash_values << '\n'; // results in: [182, 216, 134]
}