1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause
#pragma once
#include <robin_hood.h>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/io/sequence_file/input.hpp>
#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
#include <seqan3/search/kmer_index/shape.hpp>
#include <seqan3/search/views/minimiser_hash.hpp>
static inline constexpr uint64_t adjust_seed(uint8_t const kmer_size,
uint64_t const seed = 0x8F'3F'73'B5'CF'1C'9A'DEULL) noexcept
{
return seed >> (64u - 2u * kmer_size);
}
//!\brief arguments used for all tools
struct all_arguments
{
std::filesystem::path path_out{"./"};
uint8_t threads{1};
};
//!\brief arguments used for estimate, ibf, minimiser
struct min_arguments : all_arguments
{
uint8_t k{20};
seqan3::seed s{0x8F'3F'73'B5'CF'1C'9A'DEULL};
seqan3::shape shape = seqan3::ungapped{k};
seqan3::window_size w_size{60};
};
//!\brief arguments used for estimate, ibf, ibfmin
struct estimate_ibf_arguments : min_arguments
{
bool compressed = false;
std::vector<uint16_t> expression_thresholds{}; // Expression levels which should be created
uint8_t number_expression_thresholds{}; // If set, the expression levels are determined by the program.
bool samplewise{false};
template <class Archive>
void save(Archive & archive) const
{
archive(k);
archive(w_size.get());
archive(s.get());
archive(shape);
archive(compressed);
archive(number_expression_thresholds);
archive(expression_thresholds);
archive(samplewise);
}
template <class Archive>
void load(Archive & archive)
{
archive(k);
archive(w_size.get());
archive(s.get());
archive(shape);
archive(compressed);
archive(number_expression_thresholds);
archive(expression_thresholds);
archive(samplewise);
}
};
/*! \brief Function, loading arguments
* \param args arguments to load
* \param ipath Path, where the arguments can be found.
*/
[[maybe_unused]] static void load_args(estimate_ibf_arguments & args, std::filesystem::path ipath)
{
std::ifstream is{ipath, std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
iarchive(args);
}
/*! \brief Function, which stores the arguments
* \param args arguments to store
* \param opath Path, where the arguments should be stored.
*/
[[maybe_unused]] static void store_args(estimate_ibf_arguments const & args, std::filesystem::path opath)
{
std::ofstream os{opath, std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(args);
}
//!\brief Use dna4 instead of default dna5
struct my_traits : seqan3::sequence_file_input_default_traits_dna
{
using sequence_alphabet = seqan3::dna4;
//TODO: Should I use a bitcompressed_vector to save memory but with the disadvantage of losing speed?
//template <typename alph>
//using sequence_container = seqan3::bitcompressed_vector<alph>;
};
/*! \brief Function, loading compressed and uncompressed ibfs
* \param ibf ibf to load
* \param ipath Path, where the ibf can be found.
*/
template <class IBFType>
void load_ibf(IBFType & ibf, std::filesystem::path ipath)
{
std::ifstream is{ipath, std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
iarchive(ibf);
}
/*! \brief Function, which stored compressed and uncompressed ibfs
* \param ibf The IBF to store.
* \param opath Path, where the IBF should be stored.
*/
template <class IBFType>
void store_ibf(IBFType const & ibf, std::filesystem::path opath)
{
std::ofstream os{opath, std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(ibf);
}
|