1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
#include "btllib/bloom_filter.hpp"
#include "helpers.hpp"
#include <algorithm>
#include <cstdio>
#include <iostream>
#include <string>
int
main()
{
std::cerr << "Testing BloomFilter" << std::endl;
btllib::BloomFilter bf(1024 * 1024, 3, "ntHash");
bf.insert({ 1, 10, 100 });
bf.insert({ 100, 200, 300 });
TEST_ASSERT(bf.contains({ 1, 10, 100 }));
TEST_ASSERT(bf.contains({ 100, 200, 300 }));
TEST_ASSERT(!bf.contains({ 1, 20, 100 }));
auto filename = get_random_name(64);
bf.save(filename);
TEST_ASSERT(btllib::BloomFilter::is_bloom_file(filename));
btllib::BloomFilter bf2(filename);
TEST_ASSERT_EQ(bf2.get_hash_fn(), "ntHash");
TEST_ASSERT(bf2.contains({ 1, 10, 100 }));
TEST_ASSERT(bf2.contains({ 100, 200, 300 }));
TEST_ASSERT(!bf2.contains({ 1, 20, 100 }));
TEST_ASSERT(!bf2.contains_insert({ 9, 99, 999 }));
TEST_ASSERT(bf2.contains_insert({ 9, 99, 999 }));
std::remove(filename.c_str());
std::string seq = "CACTATCGACGATCATTCGAGCATCAGCGACTG";
std::string seq2 = "GTAGTACGATCAGCGACTATCGAGCTACGAGCA";
TEST_ASSERT_EQ(seq.size(), seq2.size());
std::cerr << "Testing KmerBloomFilter" << std::endl;
btllib::KmerBloomFilter kmer_bf(1024 * 1024, 4, seq.size() / 2);
kmer_bf.insert(seq);
TEST_ASSERT_EQ(kmer_bf.contains(seq), (seq.size() - seq.size() / 2 + 1));
TEST_ASSERT_LE(kmer_bf.contains(seq2), 1);
std::cerr << "Testing SeedBloomFilter" << std::endl;
std::string seed1 = "000001111111111111111111111111111";
std::string seed2 = "111111111111111111111111111100000";
std::string snp_seq1 = "AACTATCGACGATCATTCGAGCATCAGCGACTG";
std::string snp_seq2 = "CACTATCGACGATCATTCGAGCATCAGCGACTA";
TEST_ASSERT_EQ(seed1.size(), seed2.size());
btllib::SeedBloomFilter seed_bf(1024 * 1024, seq.size(), { seed1, seed2 }, 4);
seed_bf.insert(seq);
auto hit_seeds = seed_bf.contains(seq);
TEST_ASSERT(std::find(hit_seeds[0].begin(), hit_seeds[0].end(), 0) !=
hit_seeds[0].end());
TEST_ASSERT(std::find(hit_seeds[0].begin(), hit_seeds[0].end(), 1) !=
hit_seeds[0].end());
hit_seeds = seed_bf.contains(snp_seq1);
TEST_ASSERT(std::find(hit_seeds[0].begin(), hit_seeds[0].end(), 0) !=
hit_seeds[0].end());
TEST_ASSERT(std::find(hit_seeds[0].begin(), hit_seeds[0].end(), 1) ==
hit_seeds[0].end());
hit_seeds = seed_bf.contains(snp_seq2);
TEST_ASSERT(std::find(hit_seeds[0].begin(), hit_seeds[0].end(), 0) ==
hit_seeds[0].end());
TEST_ASSERT(std::find(hit_seeds[0].begin(), hit_seeds[0].end(), 1) !=
hit_seeds[0].end());
std::cerr << "Testing KmerBloomFilter with multiple threads" << std::endl;
std::vector<std::string> present_seqs;
std::vector<std::string> absent_seqs;
for (size_t i = 0; i < 1000; i++) {
present_seqs.push_back(get_random_seq(get_random(100, 200)));
absent_seqs.push_back(get_random_seq(get_random(100, 200)));
}
std::vector<std::string> present_seqs2 = present_seqs;
btllib::KmerBloomFilter kmer_bf2(50 * 1024 * 1024, 4, 100);
#pragma omp parallel shared(present_seqs, present_seqs2, absent_seqs, kmer_bf2)
{
while (true) {
std::string seq;
bool end = false;
#pragma omp critical
{
if (present_seqs.empty()) {
end = true;
} else {
seq = present_seqs.back();
present_seqs.pop_back();
}
}
if (end) {
break;
}
kmer_bf2.insert(seq);
}
}
unsigned false_positives = 0;
#pragma omp parallel shared(present_seqs, \
present_seqs2, \
absent_seqs, kmer_bf2) \
reduction(+:false_positives)
{
while (true) {
std::string seq;
bool end = false;
#pragma omp critical
{
if (absent_seqs.empty()) {
end = true;
} else {
seq = absent_seqs.back();
absent_seqs.pop_back();
}
}
if (end) {
break;
}
false_positives += kmer_bf2.contains(seq);
}
}
std::cerr << "False positives = " << false_positives << std::endl;
TEST_ASSERT_LT(false_positives, 10);
#pragma omp parallel shared(present_seqs, present_seqs2, absent_seqs, kmer_bf2)
{
while (true) {
std::string seq;
bool end = false;
#pragma omp critical
{
if (present_seqs2.empty()) {
end = true;
} else {
seq = present_seqs2.back();
present_seqs2.pop_back();
}
}
if (end) {
break;
}
TEST_ASSERT(kmer_bf2.contains(seq));
}
}
return 0;
}
|