File: read_mapper_indexer_step3.cpp

package info (click to toggle)
seqan3 3.4.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,580 kB
  • sloc: cpp: 145,192; sh: 307; xml: 264; javascript: 95; makefile: 70; perl: 29; php: 15
file content (100 lines) | stat: -rw-r--r-- 2,926 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0

#include <seqan3/core/platform.hpp>
#if SEQAN3_HAS_CEREAL
//![complete]
#    include <fstream>

#    include <seqan3/argument_parser/all.hpp>
#    include <seqan3/io/sequence_file/input.hpp>
#    include <seqan3/search/fm_index/bi_fm_index.hpp>

#    include <cereal/archives/binary.hpp>

struct reference_storage_t
{
    std::vector<std::string> ids;
    std::vector<std::vector<seqan3::dna5>> seqs;
};

//! [solution]
void read_reference(std::filesystem::path const & reference_path, reference_storage_t & storage)
{
    seqan3::sequence_file_input reference_in{reference_path};
    for (auto && record : reference_in)
    {
        storage.ids.push_back(record.id());
        storage.seqs.push_back(record.sequence());
    }
}

//! [create_index]
void create_index(std::filesystem::path const & index_path, reference_storage_t & storage)
//! [create_index]
{
    seqan3::bi_fm_index index{storage.seqs};
    {
        std::ofstream os{index_path, std::ios::binary};
        cereal::BinaryOutputArchive oarchive{os};
        oarchive(index);
    }
}

void run_program(std::filesystem::path const & reference_path, std::filesystem::path const & index_path)
{
    reference_storage_t storage{};
    read_reference(reference_path, storage);
    create_index(index_path, storage);
}
//! [solution]

struct cmd_arguments
{
    std::filesystem::path reference_path{};
    std::filesystem::path index_path{"out.index"};
};

void initialise_argument_parser(seqan3::argument_parser & parser, cmd_arguments & args)
{
    parser.info.author = "E. coli";
    parser.info.short_description = "Creates an index over a reference.";
    parser.info.version = "1.0.0";
    parser.add_option(args.reference_path,
                      'r',
                      "reference",
                      "The path to the reference.",
                      seqan3::option_spec::required,
                      seqan3::input_file_validator{{"fa", "fasta"}});
    parser.add_option(args.index_path,
                      'o',
                      "output",
                      "The output index file path.",
                      seqan3::option_spec::standard,
                      seqan3::output_file_validator{seqan3::output_file_open_options::create_new, {"index"}});
}

int main(int argc, char const ** argv)
{
    seqan3::argument_parser parser("Indexer", argc, argv);
    cmd_arguments args{};

    initialise_argument_parser(parser, args);

    try
    {
        parser.parse();
    }
    catch (seqan3::argument_parser_error const & ext)
    {
        std::cerr << "[PARSER ERROR] " << ext.what() << '\n';
        return -1;
    }

    run_program(args.reference_path, args.index_path);

    return 0;
}
//![complete]
#endif //SEQAN3_HAS_CEREAL