File: KmerIntervalMap.hpp

package info (click to toggle)
salmon 0.7.2%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,352 kB
  • ctags: 5,243
  • sloc: cpp: 42,341; ansic: 6,252; python: 228; makefile: 207; sh: 190
file content (131 lines) | stat: -rw-r--r-- 3,064 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#ifndef __KMER_INTERVAL_MAP_HPP__
#define __KMER_INTERVAL_MAP_HPP__

extern "C" {
#include "bwt.h"
}

#include <fstream>
#include <unordered_map>

#include <jellyfish/mer_dna.hpp>

#include "cereal/archives/binary.hpp"
#include "cereal/types/unordered_map.hpp"

#include "xxhash.h"

using JFMer = jellyfish::mer_dna_ns::mer_base_static<uint64_t, 1>;

// What will be the keys in our k-mer has map
struct KmerKey {
    KmerKey() {
        mer_.polyT();
    }

    KmerKey(uint8_t* seq, uint32_t len) : mer_(len) {
        mer_.polyT();
        for (size_t i = 0; i < len; ++i) {
            mer_.shift_left(seq[i]);
        }
    }

    bool operator==(const KmerKey& ok) const {
        return mer_ == ok.mer_;
    }

    // Is there a smarter way to do save / load here?
    template <typename Archive>
    void save(Archive& archive) const {
        auto key = mer_.get_bits(0, 2*mer_.k());
        archive(key);
    }

    template <typename Archive>
    void load(Archive& archive) {
        mer_.polyT();
        uint64_t bits;
        archive(bits);
        mer_.set_bits(0, 2*mer_.k(), bits);
    }

    JFMer mer_;
};

template <typename Archive>
void load(Archive& archive, bwtintv_t& interval) {
    archive( interval.x[0], interval.x[1], interval.x[2], interval.info );
}

template <typename Archive>
void save(Archive& archive, const bwtintv_t& interval) {
    archive( interval.x[0], interval.x[1], interval.x[2], interval.info );
}

/**
 *  This class provides an efficent hash-map from 
 *  k-mers to BWT intervals.
 */
class KmerIntervalMap {
    public:
    // How we hash the keys
    struct KmerHasher {
        std::size_t operator()(const KmerKey& k) const {
            void* data = static_cast<void*>(const_cast<KmerKey&>(k).mer_.data__());
           return XXH64(data, sizeof(uint64_t), 0);
        }
    };
 
    private:
        std::unordered_map<KmerKey, bwtintv_t, KmerHasher> map_;

    public:
    void setK(unsigned int k) { JFMer::k(k); }
    uint32_t k() { return JFMer::k(); }

    bool hasKmer(KmerKey& k) {
        return map_.find(k) != map_.end();
    }

    decltype(map_)::iterator find(const KmerKey& k) {
        return map_.find(k);
    }
    decltype(map_)::iterator find(KmerKey&& k) {
        return map_.find(k);
    }

    decltype(map_)::iterator end() {
        return map_.end();
    }


    bwtintv_t& operator[](const KmerKey& k) {
        return map_[k];
    }
    bwtintv_t& operator[](KmerKey&& k) {
        return map_[k];
    }
    
    decltype(map_)::size_type size() { return map_.size(); }

    void save(boost::filesystem::path indexPath) {
        std::ofstream ofs(indexPath.string(), std::ios::binary);
        {
            cereal::BinaryOutputArchive oa(ofs);
            oa(map_);
        }
        ofs.close();
    }

    void load(boost::filesystem::path indexPath) {
        std::ifstream ifs(indexPath.string(), std::ios::binary);
        {
            cereal::BinaryInputArchive ia(ifs);
            ia(map_);
        }
        ifs.close();
    }

};

#endif // __KMER_INTERVAL_MAP_HPP__