File: search_solution5.cpp

package info (click to toggle)
seqan3 3.4.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,580 kB
  • sloc: cpp: 145,192; sh: 307; xml: 264; javascript: 95; makefile: 70; perl: 29; php: 15
file content (92 lines) | stat: -rw-r--r-- 3,911 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0

#include <span>

#include <seqan3/alignment/configuration/all.hpp>
#include <seqan3/alignment/pairwise/align_pairwise.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/search/fm_index/fm_index.hpp>
#include <seqan3/search/search.hpp>

using namespace seqan3::literals;

// Define the pairwise alignment configuration globally.
inline constexpr auto align_config =
    seqan3::align_cfg::method_global{seqan3::align_cfg::free_end_gaps_sequence1_leading{true},
                                     seqan3::align_cfg::free_end_gaps_sequence2_leading{false},
                                     seqan3::align_cfg::free_end_gaps_sequence1_trailing{true},
                                     seqan3::align_cfg::free_end_gaps_sequence2_trailing{false}}
    | seqan3::align_cfg::edit_scheme | seqan3::align_cfg::output_alignment{} | seqan3::align_cfg::output_score{};

void run_text_single()
{
    seqan3::dna4_vector text{
        "CGCTGTCTGAAGGATGAGTGTCAGCCAGTGTAACCCGATGAGCTACCCAGTAGTCGAACTGGGCCAGACAACCCGGCGCTAATGCACTCA"_dna4};
    seqan3::dna4_vector query{"GCT"_dna4};
    seqan3::fm_index index{text};

    seqan3::debug_stream << "Searching all best hits allowing for 1 error in a single text\n";

    seqan3::configuration const search_config =
        seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} | seqan3::search_cfg::hit_all_best{};

    auto search_results = search(query, index, search_config);

    seqan3::debug_stream << "-----------------\n";

    for (auto && hit : search_results)
    {
        size_t start = hit.reference_begin_position() ? hit.reference_begin_position() - 1 : 0;
        std::span text_view{std::data(text) + start, query.size() + 1};

        for (auto && res : align_pairwise(std::tie(text_view, query), align_config))
        {
            auto && [aligned_database, aligned_query] = res.alignment();
            seqan3::debug_stream << "score:    " << res.score() << '\n';
            seqan3::debug_stream << "database: " << aligned_database << '\n';
            seqan3::debug_stream << "query:    " << aligned_query << '\n';
            seqan3::debug_stream << "=============\n";
        }
    }
}

void run_text_collection()
{
    std::vector<seqan3::dna4_vector> text{"CGCTGTCTGAAGGATGAGTGTCAGCCAGTGTA"_dna4,
                                          "ACCCGATGAGCTACCCAGTAGTCGAACTG"_dna4,
                                          "GGCCAGACAACCCGGCGCTAATGCACTCA"_dna4};
    seqan3::dna4_vector query{"GCT"_dna4};
    seqan3::fm_index index{text};

    seqan3::debug_stream << "Searching all best hits allowing for 1 error in a text collection\n";

    seqan3::configuration const search_config =
        seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} | seqan3::search_cfg::hit_all_best{};

    seqan3::debug_stream << "-----------------\n";

    for (auto && hit : search(query, index, search_config))
    {
        size_t start = hit.reference_begin_position() ? hit.reference_begin_position() - 1 : 0;
        std::span text_view{std::data(text[hit.reference_id()]) + start, query.size() + 1};

        for (auto && res : align_pairwise(std::tie(text_view, query), align_config))
        {
            auto && [aligned_database, aligned_query] = res.alignment();
            seqan3::debug_stream << "score:    " << res.score() << '\n';
            seqan3::debug_stream << "database: " << aligned_database << '\n';
            seqan3::debug_stream << "query:    " << aligned_query << '\n';
            seqan3::debug_stream << "=============\n";
        }
    }
}

int main()
{
    run_text_single();
    seqan3::debug_stream << '\n';
    run_text_collection();
}