File: view_minimiser_hash_test.cpp

package info (click to toggle)
seqan3 3.0.2%2Bds-9
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 16,052 kB
  • sloc: cpp: 144,641; makefile: 1,288; ansic: 294; sh: 228; xml: 217; javascript: 50; python: 27; php: 25
file content (125 lines) | stat: -rw-r--r-- 6,009 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// -----------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
// -----------------------------------------------------------------------------------------------------

#include <forward_list>
#include <list>

#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/range/container/bitcompressed_vector.hpp>
#include <seqan3/range/views/minimiser_hash.hpp>
#include <seqan3/range/views/take_until.hpp>
#include <seqan3/test/expect_range_eq.hpp>

#include <gtest/gtest.h>

#include "../iterator_test_template.hpp"

using seqan3::operator""_dna4;
using seqan3::operator""_shape;
using result_t = std::vector<size_t>;
using iterator_type = std::ranges::iterator_t<decltype(std::declval<seqan3::dna4_vector&>()
                                                       | seqan3::views::minimiser_hash(seqan3::ungapped{4},
                                                                                       seqan3::window_size{8},
                                                                                       seqan3::seed{0}))>;

static constexpr seqan3::shape ungapped_shape = seqan3::ungapped{4};
static constexpr seqan3::shape gapped_shape = 0b1001_shape;
static constexpr auto ungapped_view = seqan3::views::minimiser_hash(ungapped_shape,
                                                                    seqan3::window_size{8},
                                                                    seqan3::seed{0});
static constexpr auto gapped_view = seqan3::views::minimiser_hash(gapped_shape,
                                                                  seqan3::window_size{8},
                                                                  seqan3::seed{0});

template <>
struct iterator_fixture<iterator_type> : public ::testing::Test
{
    using iterator_tag = std::forward_iterator_tag;
    static constexpr bool const_iterable = false;

    seqan3::dna4_vector text{"ACGGCGACGTTTAG"_dna4};
    result_t expected_range{26, 97, 27, 6, 1};

    using test_range_t = decltype(text | ungapped_view);
    test_range_t test_range = text | ungapped_view;
};

using test_type = ::testing::Types<iterator_type>;
INSTANTIATE_TYPED_TEST_SUITE_P(iterator_fixture, iterator_fixture, test_type, );

template <typename T>
class minimiser_hash_properties_test: public ::testing::Test { };

using underlying_range_types = ::testing::Types<std::vector<seqan3::dna4>,
                                                std::vector<seqan3::dna4> const,
                                                seqan3::bitcompressed_vector<seqan3::dna4>,
                                                seqan3::bitcompressed_vector<seqan3::dna4> const,
                                                std::list<seqan3::dna4>,
                                                std::list<seqan3::dna4> const>;

TYPED_TEST_SUITE(minimiser_hash_properties_test, underlying_range_types, );
class minimiser_hash_test : public ::testing::Test
{
protected:
    std::vector<seqan3::dna4> text1{"AAAAAAAAAAAAAAAAAAA"_dna4};
    std::vector<seqan3::dna4> text1_short{"AAAAAA"_dna4};
    result_t result1{0, 0, 0}; // Same for ungapped and gapped
    result_t ungapped_default_seed{0x8F3F73B5CF1C9A21, 0x8F3F73B5CF1C9A21, 0x8F3F73B5CF1C9A21};
    result_t gapped_default_seed{0x8F3F73B5CF1C9AD1, 0x8F3F73B5CF1C9AD1, 0x8F3F73B5CF1C9AD1};

    std::vector<seqan3::dna4> text2{"AC"_dna4};
    result_t result2{};

    std::vector<seqan3::dna4> text3{"ACGGCGACGTTTAG"_dna4};
    result_t ungapped3{26, 97, 27, 6, 1}; // ACGG, CGAC, ACGT, aacg, aaac
    result_t ungapped_stop_at_t3{26, 97}; // ACGG, CGAC
    result_t gapped3{2, 5, 3, 2, 1};      // A--G, C--C, A--T, a--g, a--c "-" for gap
    result_t gapped_stop_at_t3{2, 5};     // A--G, C--C "-" for gap
};

TYPED_TEST(minimiser_hash_properties_test, different_input_ranges)
{
    TypeParam text{'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4, 'C'_dna4, 'G'_dna4, 'A'_dna4, 'C'_dna4, 'G'_dna4, 'T'_dna4,
                   'T'_dna4, 'T'_dna4, 'A'_dna4, 'G'_dna4}; // ACGTCGACGTTTAG
    result_t ungapped{27, 97, 27, 6, 1}; // ACGT, CGAC, ACGT, aacg, aaac
    result_t gapped{3, 5, 3, 2, 1};      // A--T, C--C, A--T, a--g, a--c - "-" for gap
    EXPECT_RANGE_EQ(ungapped, text | ungapped_view);
    EXPECT_RANGE_EQ(gapped, text | gapped_view);
}

TEST_F(minimiser_hash_test, ungapped)
{
    EXPECT_RANGE_EQ(result1, text1 | ungapped_view);
    EXPECT_RANGE_EQ(result2, text2 | ungapped_view);
    EXPECT_RANGE_EQ(ungapped3, text3 | ungapped_view);

    auto stop_at_t = seqan3::views::take_until([] (seqan3::dna4 const x) { return x == 'T'_dna4; });
    EXPECT_RANGE_EQ(ungapped_stop_at_t3, text3 | stop_at_t | ungapped_view);
}

TEST_F(minimiser_hash_test, gapped)
{
    EXPECT_RANGE_EQ(result1, text1 | gapped_view);
    EXPECT_RANGE_EQ(result2, text2 | gapped_view);
    EXPECT_RANGE_EQ(gapped3, text3 | gapped_view);

    auto stop_at_t = seqan3::views::take_until([] (seqan3::dna4 const x) { return x == 'T'_dna4; });
    EXPECT_RANGE_EQ(gapped_stop_at_t3, text3 | stop_at_t | gapped_view);
}

TEST_F(minimiser_hash_test, seed)
{
    EXPECT_RANGE_EQ(ungapped_default_seed,
                    text1 | seqan3::views::minimiser_hash(ungapped_shape, seqan3::window_size{8}));
    EXPECT_RANGE_EQ(gapped_default_seed, text1 | seqan3::views::minimiser_hash(gapped_shape, seqan3::window_size{8}));
}

TEST_F(minimiser_hash_test, shape_bigger_than_window)
{
    EXPECT_THROW(text1 | seqan3::views::minimiser_hash(ungapped_shape, seqan3::window_size{3}), std::invalid_argument);
    EXPECT_THROW(text1 | seqan3::views::minimiser_hash(gapped_shape, seqan3::window_size{3}), std::invalid_argument);
}