1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
|
#include <fstream>
#include <numeric> // std::accumulate
//![include_ranges_chunk]
#include <range/v3/view/chunk.hpp>
//![include_ranges_chunk]
#include <seqan3/core/char_operations/predicate.hpp>
//![include]
#include <seqan3/io/sequence_file/all.hpp>
//![include]
//![include_debug_stream]
#include <seqan3/core/debug_stream.hpp>
//![include_debug_stream]
#include <seqan3/range/detail/misc.hpp>
//![include_ranges]
#include <seqan3/std/ranges>
//![include_ranges]
struct write_file_dummy_struct
{
std::filesystem::path const tmp_path = std::filesystem::temp_directory_path();
write_file_dummy_struct()
{
auto file_raw = R"////![fastq_file]";
std::ofstream file{tmp_path/"my.fastq"};
std::string str{file_raw};
file << str.substr(1); // skip first newline
std::ofstream file2{tmp_path/"my.qq"};
file2 << str.substr(1); // skip first newline
std::ofstream file3{tmp_path/"my.fasta"};
file3 << ">seq1\nAVAV\n>seq2\nAVAVA\n";
}
~write_file_dummy_struct()
{
std::error_code ec{};
std::filesystem::path file_path{};
file_path = tmp_path/"my.fastq";
std::filesystem::remove(file_path, ec);
if (ec)
seqan3::debug_stream << "[WARNING] Could not delete " << file_path << ". " << ec.message() << '\n';
file_path = tmp_path/"my.qq";
std::filesystem::remove(file_path, ec);
if (ec)
seqan3::debug_stream << "[WARNING] Could not delete " << file_path << ". " << ec.message() << '\n';
file_path = tmp_path/"my.fasta";
std::filesystem::remove(file_path, ec);
if (ec)
seqan3::debug_stream << "[WARNING] Could not delete " << file_path << ". " << ec.message() << '\n';
}
};
write_file_dummy_struct go{}; // write file
int main()
{
{
//![file_extensions]
seqan3::debug_stream << seqan3::format_fastq::file_extensions << '\n'; // prints [fastq,fq]
//![file_extensions]
//![modify_file_extensions]
seqan3::format_fastq::file_extensions.push_back("qq");
seqan3::sequence_file_input fin{std::filesystem::temp_directory_path()/"my.qq"}; // detects FASTQ format
//![modify_file_extensions]
}
{
/*
//![construct_from_cin]
seqan3::sequence_file_input fin{std::cin, format_fasta{}};
//![construct_from_cin]
*/
}
{
//![amino_acid_type_trait]
seqan3::sequence_file_input<seqan3::sequence_file_input_default_traits_aa> fin{std::filesystem::temp_directory_path()/"my.fasta"};
//![amino_acid_type_trait]
}
{
//![record_type]
seqan3::sequence_file_input fin{std::filesystem::temp_directory_path()/"my.fastq"};
using record_type = typename decltype(fin)::record_type;
// Because `fin` is a range, we can access the first element by dereferencing fin.begin()
record_type rec = *fin.begin();
//![record_type]
}
{
seqan3::sequence_file_input fin{std::filesystem::temp_directory_path()/"my.fastq"};
using record_type = typename decltype(fin)::record_type;
//![record_type2]
record_type rec = std::move(*fin.begin()); // avoid copying
//![record_type2]
}
{
//![paired_reads]
// for simplicity we take the same file
seqan3::sequence_file_input fin1{std::filesystem::temp_directory_path()/"my.fastq"};
seqan3::sequence_file_input fin2{std::filesystem::temp_directory_path()/"my.fastq"};
for (auto && [rec1, rec2] : seqan3::views::zip(fin1, fin2)) // && is important!
{ // because seqan3::views::zip returns temporaries
if (seqan3::get<seqan3::field::id>(rec1) != seqan3::get<seqan3::field::id>(rec2))
throw std::runtime_error("Oh oh your pairs don't match.");
}
//![paired_reads]
}
{
//![read_in_batches]
seqan3::sequence_file_input fin{std::filesystem::temp_directory_path()/"my.fastq"};
// `&&` is important because seqan3::views::chunk returns temporaries!
for (auto && records : fin | ranges::views::chunk(10))
{
// `records` contains 10 elements (or less at the end)
seqan3::debug_stream << "Taking the next 10 sequences:\n";
seqan3::debug_stream << "ID: " << seqan3::get<seqan3::field::id>(*records.begin()) << '\n';
} // prints first ID in batch
//![read_in_batches]
}
{
//![quality_filter]
seqan3::sequence_file_input fin{std::filesystem::temp_directory_path()/"my.fastq"};
// std::views::filter takes a function object (a lambda in this case) as input that returns a boolean
auto minimum_quality_filter = std::views::filter([] (auto const & rec)
{
auto qual = seqan3::get<seqan3::field::qual>(rec) | std::views::transform([] (auto q) { return q.to_phred(); });
double sum = std::accumulate(qual.begin(), qual.end(), 0);
return sum / std::ranges::size(qual) >= 40; // minimum average quality >= 40
});
for (auto & rec : fin | minimum_quality_filter)
{
seqan3::debug_stream << "ID: " << seqan3::get<seqan3::field::id>(rec) << '\n';
}
//![quality_filter]
}
{
//![piping_in_out]
auto tmp_dir = std::filesystem::temp_directory_path();
seqan3::sequence_file_input fin{tmp_dir/"my.fastq"};
seqan3::sequence_file_output fout{tmp_dir/"output.fastq"};
// the following are equivalent:
fin | fout;
fout = fin;
seqan3::sequence_file_output{tmp_dir/"output.fastq"} = seqan3::sequence_file_input{tmp_dir/"my.fastq"};
//![piping_in_out]
}
{
//![file_conversion]
auto tmp_dir = std::filesystem::temp_directory_path();
seqan3::sequence_file_output{tmp_dir/"output.fasta"} = seqan3::sequence_file_input{tmp_dir/"my.fastq"};
//![file_conversion]
}
std::filesystem::remove(std::filesystem::temp_directory_path()/"output.fasta");
std::filesystem::remove(std::filesystem::temp_directory_path()/"output.fastq");
}
|