1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
#include <seqan3/core/platform.hpp>
#if SEQAN3_WITH_CEREAL
//![complete]
#include <fstream>
#include <cereal/archives/binary.hpp>
#include <seqan3/alignment/configuration/all.hpp>
#include <seqan3/alignment/pairwise/align_pairwise.hpp>
#include <seqan3/argument_parser/all.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/sequence_file/input.hpp>
#include <seqan3/search/all.hpp>
#include <seqan3/search/fm_index/bi_fm_index.hpp>
#include <seqan3/std/span>
struct reference_storage_t
{
std::vector<std::string> ids;
std::vector<std::vector<seqan3::dna5>> seqs;
};
void read_reference(std::filesystem::path const & reference_path,
reference_storage_t & storage)
{
seqan3::sequence_file_input reference_in{reference_path};
for (auto & [seq, id, qual] : reference_in)
{
storage.ids.push_back(std::move(id));
storage.seqs.push_back(std::move(seq));
}
}
//! [solution]
void map_reads(std::filesystem::path const & query_path,
std::filesystem::path const & index_path,
std::filesystem::path const & sam_path,
reference_storage_t & storage,
uint8_t const errors)
{
// we need the alphabet and text layout before loading
seqan3::bi_fm_index<seqan3::dna5, seqan3::text_layout::collection> index;
{
std::ifstream is{index_path, std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
iarchive(index);
}
seqan3::sequence_file_input query_file_in{query_path};
seqan3::configuration const search_config = seqan3::search_cfg::max_error_total{
seqan3::search_cfg::error_count{errors}} |
seqan3::search_cfg::hit_all_best{};
//! [alignment_config]
seqan3::configuration const align_config = seqan3::align_cfg::method_global{
seqan3::align_cfg::free_end_gaps_sequence1_leading{true},
seqan3::align_cfg::free_end_gaps_sequence2_leading{false},
seqan3::align_cfg::free_end_gaps_sequence1_trailing{true},
seqan3::align_cfg::free_end_gaps_sequence2_trailing{false}} |
seqan3::align_cfg::edit_scheme |
seqan3::align_cfg::output_alignment{} |
seqan3::align_cfg::output_score{};
//! [alignment_config]
#if SEQAN3_WORKAROUND_GCC_93983
for (auto && record : query_file_in /*| seqan3::views::take(20)*/)
#else // ^^^ workaround / no workaround vvv
for (auto && record : query_file_in | seqan3::views::take(20))
#endif // SEQAN3_WORKAROUND_GCC_93983
{
auto & query = seqan3::get<seqan3::field::seq>(record);
for (auto && result : search(query, index, search_config))
{
size_t start = result.reference_begin_position() ? result.reference_begin_position() - 1 : 0;
std::span text_view{std::data(storage.seqs[result.reference_id()]) + start, query.size() + 1};
for (auto && alignment : seqan3::align_pairwise(std::tie(text_view, query), align_config))
{
auto && [aligned_database, aligned_query] = alignment.alignment();
seqan3::debug_stream << "id: " << seqan3::get<seqan3::field::id>(record) << '\n';
seqan3::debug_stream << "score: " << alignment.score() << '\n';
seqan3::debug_stream << "database: " << aligned_database << '\n';
seqan3::debug_stream << "query: " << aligned_query << '\n';
seqan3::debug_stream << "=============\n";
}
}
}
(void) sam_path; // prevent unused parameter warning
}
//! [solution]
void run_program(std::filesystem::path const & reference_path,
std::filesystem::path const & query_path,
std::filesystem::path const & index_path,
std::filesystem::path const & sam_path,
uint8_t const errors)
{
reference_storage_t storage{};
read_reference(reference_path, storage);
map_reads(query_path, index_path, sam_path, storage, errors);
}
struct cmd_arguments
{
std::filesystem::path reference_path{};
std::filesystem::path query_path{};
std::filesystem::path index_path{};
std::filesystem::path sam_path{"out.sam"};
uint8_t errors{0};
};
void initialise_argument_parser(seqan3::argument_parser & parser, cmd_arguments & args)
{
parser.info.author = "E. coli";
parser.info.short_description = "Map reads against a reference.";
parser.info.version = "1.0.0";
parser.add_option(args.reference_path, 'r', "reference", "The path to the reference.",
seqan3::option_spec::REQUIRED,
seqan3::input_file_validator{{"fa","fasta"}});
parser.add_option(args.query_path, 'q', "query", "The path to the query.",
seqan3::option_spec::REQUIRED,
seqan3::input_file_validator{{"fq","fastq"}});
parser.add_option(args.index_path, 'i', "index", "The path to the index.",
seqan3::option_spec::REQUIRED,
seqan3::input_file_validator{{"index"}});
parser.add_option(args.sam_path, 'o', "output", "The output SAM file path.",
seqan3::option_spec::DEFAULT,
seqan3::output_file_validator{{"sam"}});
parser.add_option(args.errors, 'e', "error", "Maximum allowed errors.",
seqan3::option_spec::DEFAULT,
seqan3::arithmetic_range_validator{0, 4});
}
int main(int argc, char const ** argv)
{
seqan3::argument_parser parser("Mapper", argc, argv);
cmd_arguments args{};
initialise_argument_parser(parser, args);
try
{
parser.parse();
}
catch (seqan3::argument_parser_error const & ext)
{
std::cerr << "[PARSER ERROR] " << ext.what() << '\n';
return -1;
}
run_program(args.reference_path, args.query_path, args.index_path, args.sam_path, args.errors);
return 0;
}
//![complete]
#endif //SEQAN3_WITH_CEREAL
|