File: search_solution5.cpp

package info (click to toggle)
seqan3 3.0.2%2Bds-9
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 16,052 kB
  • sloc: cpp: 144,641; makefile: 1,288; ansic: 294; sh: 228; xml: 217; javascript: 50; python: 27; php: 25
file content (89 lines) | stat: -rw-r--r-- 3,964 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <seqan3/alignment/configuration/all.hpp>
#include <seqan3/alignment/pairwise/align_pairwise.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/search/search.hpp>
#include <seqan3/search/fm_index/fm_index.hpp>
#include <seqan3/std/span>

using seqan3::operator""_dna4;

// Define the pairwise alignment configuration globally.
inline constexpr auto align_config = seqan3::align_cfg::method_global{
                                         seqan3::align_cfg::free_end_gaps_sequence1_leading{true},
                                         seqan3::align_cfg::free_end_gaps_sequence2_leading{false},
                                         seqan3::align_cfg::free_end_gaps_sequence1_trailing{true},
                                         seqan3::align_cfg::free_end_gaps_sequence2_trailing{false}} |
                                     seqan3::align_cfg::edit_scheme |
                                     seqan3::align_cfg::output_alignment{} |
                                     seqan3::align_cfg::output_score{};

void run_text_single()
{
    seqan3::dna4_vector
                text{"CGCTGTCTGAAGGATGAGTGTCAGCCAGTGTAACCCGATGAGCTACCCAGTAGTCGAACTGGGCCAGACAACCCGGCGCTAATGCACTCA"_dna4};
    seqan3::dna4_vector query{"GCT"_dna4};
    seqan3::fm_index index{text};

    seqan3::debug_stream << "Searching all best hits allowing for 1 error in a single text\n";

    seqan3::configuration const search_config = seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} |
                                                seqan3::search_cfg::hit_all_best{};

    auto search_results = search(query, index, search_config);

    seqan3::debug_stream << "-----------------\n";

    for (auto && hit : search_results)
    {
        size_t start = hit.reference_begin_position() ? hit.reference_begin_position() - 1 : 0;
        std::span text_view{std::data(text) + start, query.size() + 1};

        for (auto && res : align_pairwise(std::tie(text_view, query), align_config))
        {
            auto && [aligned_database, aligned_query] = res.alignment();
            seqan3::debug_stream << "score:    " << res.score() << '\n';
            seqan3::debug_stream << "database: " << aligned_database << '\n';
            seqan3::debug_stream << "query:    " << aligned_query << '\n';
            seqan3::debug_stream << "=============\n";
        }
    }
}

void run_text_collection()
{
    std::vector<seqan3::dna4_vector> text{"CGCTGTCTGAAGGATGAGTGTCAGCCAGTGTA"_dna4,
                                          "ACCCGATGAGCTACCCAGTAGTCGAACTG"_dna4,
                                          "GGCCAGACAACCCGGCGCTAATGCACTCA"_dna4};
    seqan3::dna4_vector query{"GCT"_dna4};
    seqan3::fm_index index{text};

    seqan3::debug_stream << "Searching all best hits allowing for 1 error in a text collection\n";

    seqan3::configuration const search_config = seqan3::search_cfg::max_error_total{seqan3::search_cfg::error_count{1}} |
                                                seqan3::search_cfg::hit_all_best{};

    seqan3::debug_stream << "-----------------\n";

    for (auto && hit : search(query, index, search_config))
    {
        size_t start = hit.reference_begin_position() ? hit.reference_begin_position() - 1 : 0;
        std::span text_view{std::data(text[hit.reference_id()]) + start, query.size() + 1};

        for (auto && res : align_pairwise(std::tie(text_view, query), align_config))
        {
            auto && [aligned_database, aligned_query] = res.alignment();
            seqan3::debug_stream << "score:    " << res.score() << '\n';
            seqan3::debug_stream << "database: " << aligned_database << '\n';
            seqan3::debug_stream << "query:    " << aligned_query << '\n';
            seqan3::debug_stream << "=============\n";
        }
    }
}

int main()
{
   run_text_single();
   seqan3::debug_stream << '\n';
   run_text_collection();
}