1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
//***************************************************************************
//* Copyright (c) 2016 Saint Petersburg State University
//* All Rights Reserved
//* See file LICENSE for details.
//***************************************************************************
#ifndef __KMER_MAP_HPP__
#define __KMER_MAP_HPP__
#include "sequence/rtseq.hpp"
#include <hat-trie/hat-trie.h>
#include <boost/iterator/iterator_facade.hpp>
namespace debruijn_graph {
class KMerMap {
typedef RtSeq Kmer;
typedef RtSeq Seq;
typedef typename Seq::DataType RawSeqData;
value_t* internal_tryget(const Kmer &key) const {
return hattrie_tryget(mapping_, (const char *)key.data(), rawcnt_ * sizeof(RawSeqData));
}
value_t* internal_get(const Kmer &key) const {
return hattrie_get(mapping_, (const char *)key.data(), rawcnt_ * sizeof(RawSeqData));
}
int internal_erase(const Kmer &key) {
return hattrie_del(mapping_, (const char *)key.data(), rawcnt_ * sizeof(RawSeqData));
}
class iterator : public boost::iterator_facade<iterator,
const std::pair<Kmer, Seq>,
std::forward_iterator_tag,
const std::pair<Kmer, Seq>> {
public:
iterator(unsigned k, hattrie_iter_t *start = nullptr)
: k_(k), iter_(start, [](hattrie_iter_t *p) { hattrie_iter_free(p); }) {}
private:
friend class boost::iterator_core_access;
void increment() {
hattrie_iter_next(iter_.get());
}
bool equal(const iterator &other) const {
// Special case: NULL and finished are equal
if (iter_.get() == nullptr || hattrie_iter_finished(iter_.get()))
return other.iter_.get() == nullptr || hattrie_iter_finished(other.iter_.get());
if (other.iter_.get() == nullptr)
return false;
return hattrie_iter_equal(iter_.get(), other.iter_.get());
}
const std::pair<Kmer, Seq> dereference() const {
size_t len;
Kmer k(k_, (const RawSeqData*)hattrie_iter_key(iter_.get(), &len));
Seq s(k_, (const RawSeqData*)(*hattrie_iter_val(iter_.get())));
return std::make_pair(k, s);
}
unsigned k_;
std::shared_ptr<hattrie_iter_t> iter_;
};
public:
KMerMap(unsigned k)
: k_(k), mapping_(hattrie_create()) {
rawcnt_ = (unsigned)Seq::GetDataSize(k_);
}
~KMerMap() {
clear();
hattrie_free(mapping_);
}
void erase(const Kmer &key) {
value_t *vp = internal_tryget(key);
if (vp == nullptr)
return;
RawSeqData *value = reinterpret_cast<RawSeqData*>(*vp);
delete[] value;
int res = internal_erase(key);
VERIFY_MSG(res == 0, "Failed to delete from kmer mapper");
}
void set(const Kmer &key, const Seq &value) {
value_t *vp = internal_tryget(key);
RawSeqData *rawvalue = nullptr;
if (vp == nullptr) {
vp = internal_get(key);
rawvalue = new RawSeqData[rawcnt_];
*vp = reinterpret_cast<uintptr_t>(rawvalue);
} else {
rawvalue = reinterpret_cast<RawSeqData*>(*vp);
}
memcpy(rawvalue, value.data(), rawcnt_ * sizeof(RawSeqData));
}
bool count(const Kmer &key) const {
return internal_tryget(key) != nullptr;
}
const RawSeqData *find(const Kmer &key) const {
value_t *vp = internal_tryget(key);
if (vp == nullptr)
return nullptr;
return reinterpret_cast<const RawSeqData*>(*vp);
}
void clear() {
// Delete all the values
auto *iter = hattrie_iter_begin(mapping_, false);
while (!hattrie_iter_finished(iter)) {
RawSeqData *value = (RawSeqData*)(*hattrie_iter_val(iter));
delete[] value;
hattrie_iter_next(iter);
}
hattrie_iter_free(iter);
// Delete the mapping and all the keys
hattrie_clear(mapping_);
}
size_t size() const {
return hattrie_size(mapping_);
}
iterator begin() const {
return iterator(k_, hattrie_iter_begin(mapping_, false));
}
iterator end() const {
return iterator(k_);
}
private:
unsigned k_;
unsigned rawcnt_;
hattrie_t *mapping_;
};
}
#endif // __KMER_MAP_HPP__
|