File: shared.hpp

package info (click to toggle)
seqan-needle 1.0.3%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 78,084 kB
  • sloc: cpp: 18,697; sh: 478; perl: 100; python: 99; makefile: 28; ruby: 7
file content (125 lines) | stat: -rw-r--r-- 3,920 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <robin_hood.h>

#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/io/sequence_file/input.hpp>
#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
#include <seqan3/search/kmer_index/shape.hpp>
#include <seqan3/search/views/minimiser_hash.hpp>

static inline constexpr uint64_t adjust_seed(uint8_t const kmer_size,
                                             uint64_t const seed = 0x8F'3F'73'B5'CF'1C'9A'DEULL) noexcept
{
    return seed >> (64u - 2u * kmer_size);
}

//!\brief arguments used for all tools
struct all_arguments
{
    std::filesystem::path path_out{"./"};
    uint8_t threads{1};
};

//!\brief arguments used for estimate, ibf, minimiser
struct min_arguments : all_arguments
{
    uint8_t k{20};
    seqan3::seed s{0x8F'3F'73'B5'CF'1C'9A'DEULL};
    seqan3::shape shape = seqan3::ungapped{k};
    seqan3::window_size w_size{60};
};

//!\brief arguments used for estimate, ibf, ibfmin
struct estimate_ibf_arguments : min_arguments
{
    bool compressed = false;
    std::vector<uint16_t> expression_thresholds{}; // Expression levels which should be created
    uint8_t number_expression_thresholds{};        // If set, the expression levels are determined by the program.
    bool samplewise{false};

    template <class Archive>
    void save(Archive & archive) const
    {
        archive(k);
        archive(w_size.get());
        archive(s.get());
        archive(shape);
        archive(compressed);
        archive(number_expression_thresholds);
        archive(expression_thresholds);
        archive(samplewise);
    }

    template <class Archive>
    void load(Archive & archive)
    {
        archive(k);
        archive(w_size.get());
        archive(s.get());
        archive(shape);
        archive(compressed);
        archive(number_expression_thresholds);
        archive(expression_thresholds);
        archive(samplewise);
    }
};

/*! \brief Function, loading arguments
 *  \param args   arguments to load
 *  \param ipath Path, where the arguments can be found.
 */
[[maybe_unused]] static void load_args(estimate_ibf_arguments & args, std::filesystem::path ipath)
{
    std::ifstream is{ipath, std::ios::binary};
    cereal::BinaryInputArchive iarchive{is};
    iarchive(args);
}

/*! \brief Function, which stores the arguments
 *  \param args  arguments to store
 *  \param opath Path, where the arguments should be stored.
 */
[[maybe_unused]] static void store_args(estimate_ibf_arguments const & args, std::filesystem::path opath)
{
    std::ofstream os{opath, std::ios::binary};
    cereal::BinaryOutputArchive oarchive{os};
    oarchive(args);
}

//!\brief Use dna4 instead of default dna5
struct my_traits : seqan3::sequence_file_input_default_traits_dna
{
    using sequence_alphabet = seqan3::dna4;
    //TODO: Should I use a bitcompressed_vector to save memory but with the disadvantage of losing speed?
    //template <typename alph>
    //using sequence_container = seqan3::bitcompressed_vector<alph>;
};

/*! \brief Function, loading compressed and uncompressed ibfs
 *  \param ibf   ibf to load
 *  \param ipath Path, where the ibf can be found.
 */
template <class IBFType>
void load_ibf(IBFType & ibf, std::filesystem::path ipath)
{
    std::ifstream is{ipath, std::ios::binary};
    cereal::BinaryInputArchive iarchive{is};
    iarchive(ibf);
}

/*! \brief Function, which stored compressed and uncompressed ibfs
 *  \param ibf   The IBF to store.
 *  \param opath Path, where the IBF should be stored.
 */
template <class IBFType>
void store_ibf(IBFType const & ibf, std::filesystem::path opath)
{
    std::ofstream os{opath, std::ios::binary};
    cereal::BinaryOutputArchive oarchive{os};
    oarchive(ibf);
}