File: sequence_file_solution3.cpp

package info (click to toggle)
seqan3 3.0.2%2Bds-9
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 16,052 kB
  • sloc: cpp: 144,641; makefile: 1,288; ansic: 294; sh: 228; xml: 217; javascript: 50; python: 27; php: 25
file content (92 lines) | stat: -rw-r--r-- 2,512 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <fstream>

#include <seqan3/core/debug_stream.hpp>
#include <seqan3/std/filesystem>

struct write_file_dummy_struct
{
    std::filesystem::path const file_path = std::filesystem::temp_directory_path()/"my.fastq";

    write_file_dummy_struct()
    {

auto file_raw = R"//![fastq_file](
@seq1
CGATCGATC
+
IIIIIIIII
@seq2
AGCG
+
IIII
@seq3
AGCTAGCAGCGATCG
+
IIIIIHIIJJIIIII
@seq4
AGC
+
III
@seq5
AGCTAGCAGCGATCG
+
IIIIIHIIJJIIIII
)//![fastq_file]";

        std::ofstream file{file_path};
        std::string str{file_raw};
        file << str.substr(1); // skip first newline
    }

    ~write_file_dummy_struct()
    {
        std::error_code ec{};
        std::filesystem::remove(file_path, ec);

        if (ec)
            seqan3::debug_stream << "[WARNING] Could not delete " << file_path << ". " << ec.message() << '\n';
    }
};

write_file_dummy_struct go{};

//![solution]
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/sequence_file/all.hpp>
#include <seqan3/range/views/get.hpp>
#include <seqan3/range/views/move.hpp>
#include <seqan3/std/filesystem>
#include <seqan3/std/ranges>

int main()
{
#if !SEQAN3_WORKAROUND_GCC_96070
    std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory

    seqan3::sequence_file_input fin{tmp_dir/"my.fastq"};

    auto length_filter = std::views::filter([] (auto const & rec)
    {
        return std::ranges::size(seqan3::get<seqan3::field::seq>(rec)) >= 5;
    });

    // you can use a for loop

    // for (auto & rec : fin | length_filter | std::views::take(2))
    // {
    //     seqan3::debug_stream << "ID: " << seqan3::get<seqan3::field::id>(rec) << '\n';
    // }

    // But you can also do this to retrieve all IDs into a vector:
    std::vector<std::string> ids = fin
                                 | length_filter                                    // apply length filter
                                 | std::views::take(2)                              // take first two records
                                 | seqan3::views::get<seqan3::field::id>            // select only ID from record
                                 | seqan3::views::move                              // mark ID to be moved out of record
                                 | seqan3::views::to<std::vector<std::string>>;     // convert to container
    // Note that you need to know the type of id (std::string)

    seqan3::debug_stream << ids << '\n';
#endif // !SEQAN3_WORKAROUND_GCC_96070
}
//![solution]