1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
#include <iostream>
#include <vector>
#include <tr1/unordered_map>
#include <fstream>
#include <iostream>
#include <string>
#include <set>
#include <utility>
#include <algorithm>
#include <tr1/memory>
#include "util.hpp"
#include "DNASeq.hpp"
#include "MMAPReads.hpp"
#include "KmerHashMap.hpp"
using namespace std;
int main(int argc, char** argv) {
// Initialize constants.
Options opt(argc, argv);
// Input reads with reverse complements.
KmerHashMap KmerBin(opt.ihash_st, opt.ihash_ed, opt.nhash);
// Read data.
MMAPReads readfile(opt.readFName);
// Construct hash bin.
for(unsigned int i=opt.read_st; i<opt.read_ed; i++) {
bool orig = readfile.isOrig(i);
tr1::unordered_map<Kmer, KmerOccurrence, Kmer_hash, Kmer_cmp> myKmer;
tr1::shared_ptr<DNASeq> curRead(new DNASeq(readfile[i]));
for(int pos=0; pos<=curRead->size()-opt.K; pos++) {
Kmer kmer(curRead, pos, opt.K);
if(myKmer.find(kmer)==myKmer.end()) {
// new kmer -> insert
myKmer[kmer]=KmerOccurrence(i, pos);
} else {
// existing kmer -> keep the one that is closest to the beginning of the read
if(!orig) myKmer[kmer] = KmerOccurrence(i, pos);
}
}
// Insert kmer back to KmerBin
for(tr1::unordered_map<Kmer, KmerOccurrence, Kmer_hash, Kmer_cmp>::iterator mykmer = myKmer.begin();
mykmer != myKmer.end(); mykmer++) {
KmerBin[mykmer->first].push_back(mykmer->second);
}
}
// fpre is tmpDIR.
// fsuf is the read start ID for the hash file.
KmerBin.dump_binary(opt.fpre, opt.fsuf);
return 0;
}
|