1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <seqan3/test/snippet/create_temporary_snippet_file.hpp>
seqan3::test::create_temporary_snippet_file reference_fasta{"reference.fasta",
R"////![ref_file]"}; // std::filesystem::current_path() / "reference.fasta" will be deleted after the execution
seqan3::test::create_temporary_snippet_file mapping_sam{"mapping.sam",
R"////![sam_file]"}; // std::filesystem::current_path() / "mapping.sam" will be deleted after the execution
//![solution]
#include <algorithm> // std::ranges::count
#include <filesystem>
#include <ranges>
#include <string>
#include <vector>
#include <seqan3/alignment/cigar_conversion/alignment_from_cigar.hpp>
#include <seqan3/alphabet/gap/gap.hpp>
#include <seqan3/alphabet/nucleotide/dna5.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/record.hpp>
#include <seqan3/io/sam_file/input.hpp>
#include <seqan3/io/sequence_file/input.hpp>
int main()
{
std::filesystem::path current_path = std::filesystem::current_path();
// read in reference information
seqan3::sequence_file_input reference_file{current_path / "reference.fasta"};
std::vector<std::string> reference_ids{};
std::vector<seqan3::dna5_vector> reference_sequences{};
for (auto && record : reference_file)
{
reference_ids.push_back(std::move(record.id()));
reference_sequences.push_back(std::move(record.sequence()));
}
// filter out alignments
seqan3::sam_file_input mapping_file{current_path / "mapping.sam", reference_ids, reference_sequences};
auto mapq_filter = std::views::filter(
[](auto & record)
{
return record.mapping_quality() >= 30;
});
for (auto & record : mapping_file | mapq_filter)
{
auto alignment = seqan3::alignment_from_cigar(record.cigar_sequence(),
reference_sequences[record.reference_id().value()],
record.reference_position().value(),
record.sequence());
// as loop
size_t sum_reference{};
for (auto const & char_reference : std::get<0>(alignment))
if (char_reference == seqan3::gap{})
++sum_reference;
// or via std::ranges::count
size_t sum_read = std::ranges::count(std::get<1>(alignment), seqan3::gap{});
// The reference_id is ZERO based and an optional. -1 is represented by std::nullopt (= reference not known).
std::optional reference_id = record.reference_id();
seqan3::debug_stream << record.id() << " mapped against "
<< (reference_id ? std::to_string(reference_id.value()) : "unknown reference") << " with "
<< sum_read << " gaps in the read sequence and " << sum_reference
<< " gaps in the reference sequence.\n";
}
}
//![solution]
|