File: NeighborJoinParam.cpp

package info (click to toggle)
uc-echo 1.12-15
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,864 kB
  • sloc: cpp: 1,405; python: 660; makefile: 48; sh: 11
file content (140 lines) | stat: -rw-r--r-- 5,734 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include <iostream>
#include <stdexcept>
#include <vector>
#include <tr1/unordered_map>
#include <map>
#include <fstream>
#include <iostream>
#include <string>
#include <set>
#include <utility>
#include <algorithm>
#include <memory>

#include "util.hpp"
#include "DNASeq.hpp"
#include "MMAPReads.hpp"
#include "KmerHashMap.hpp"
#include "NeighborSet.hpp"

using namespace std;

class removeNeighborPredicate {
    public:
        removeNeighborPredicate(NeighborSet &Neighbors) : Neighbors(Neighbors) {};

        bool operator()(KmerOccurrence const &occ) {
            std::map<unsigned int, std::map<unsigned int, NeighborInfo> >::iterator iter_neighbor = Neighbors.getData().find(occ.readID);
            if(iter_neighbor==Neighbors.getData().end()) {
                return false;
            } else {
                return iter_neighbor->second.size()>NeighborSet::MaxNNeighbors;
            }
        }

        NeighborSet &Neighbors;
};

int main(int argc, char** argv) {
    // Initialize constants.
    Options opt(argc, argv);

    unsigned int read_st = opt.read_st;
    unsigned int read_ed = opt.read_ed;

    unsigned int ihash_st = opt.ihash_st;
    unsigned int ihash_ed = opt.ihash_ed;

    // Read data.
    // Check for files first.
    FILE *tmpfile = fopen(opt.inputFNames[0], "rb");
    if(tmpfile == NULL) {
        throw std::runtime_error("Could not open file 1.");
    } else {
        fclose(tmpfile);
    }

    tmpfile = fopen(opt.inputFNames[1], "rb");
    if(tmpfile == NULL) {
        throw std::runtime_error("Could not open file 2.");
    } else {
        fclose(tmpfile);
    }

    // Input files are hash files.
    HashMMAP kmer_mmap(opt.inputFNames[0], opt.inputFNames[1], ihash_st, ihash_ed);

    // Initialize new Neighbor Set (adjacency list).
    NeighborSet Neighbors(opt.read_st, opt.read_ed, opt.read_st, opt.read_ed);
    MMAPReads readfile(opt.readFName);

    // Construct Neighbor Sets
    for(HashMMAP::ConstIterator kmer_iter = kmer_mmap.begin(); kmer_iter!=kmer_mmap.end(); ++kmer_iter) {
        tr1::unordered_map<unsigned int, DNASeq> Reads;

        // Initialize neighbors.
        vector<KmerOccurrence> kmer_occurrences;
        vector<KmerOccurrence> kmer_occurrences2;

        for(HashMMAP::ConstReadIterator read=kmer_iter.read_begin(); read!=kmer_iter.read_end(); ++read) {
            // Add current read to hash from readIDs to read strings
            if(read.getReadID() >= read_st && read.getReadID() < read_ed) {
                Reads[read.getReadID()] = readfile[read.getReadID()];
                kmer_occurrences.push_back(KmerOccurrence(read.getReadID(), read.getPos()));
            }
            Reads[read.getReadID()] = readfile[read.getReadID()];
            kmer_occurrences2.push_back(KmerOccurrence(read.getReadID(), read.getPos()));
        }

        // Remove every read that has more than NeighborSet::MaxNNeighbors
        //kmer_occurrences.erase(remove_if(kmer_occurrences.begin(), kmer_occurrences.end(), [&Neighbors](const KmerOccurrence& occ) {
        //            auto iter_neighbor = Neighbors.getData().find(occ.readID);
        //            if(iter_neighbor==Neighbors.getData().end()) {
        //                return false;
        //            } else {
        //                return iter_neighbor->second.size()>NeighborSet::MaxNNeighbors;
        //            }
        //        }), kmer_occurrences.end());
        kmer_occurrences.erase(remove_if(kmer_occurrences.begin(), kmer_occurrences.end(), removeNeighborPredicate(Neighbors)), kmer_occurrences.end());

        // Compute neighbor set
        for(vector<KmerOccurrence>::iterator read1=kmer_occurrences.begin(); read1!=kmer_occurrences.end(); ++read1) {
            // Add neighbors from read1 to neighbor vertices
            const unsigned int readid1 = read1->readID;
            for(vector<KmerOccurrence>::iterator read2=kmer_occurrences2.begin(); read2!=kmer_occurrences2.end(); ++read2) {
                const unsigned int readid2 = read2->readID;
                if(readid2<readid1) continue;

                map<unsigned int, map<unsigned int, NeighborInfo> >::iterator iter_neighbor1 = Neighbors.getData().find(readid1);
                if(iter_neighbor1 != Neighbors.getData().end()) {
                    if(iter_neighbor1->second.size()>NeighborSet::MaxNNeighbors) break;
                }

                NeighborInfo old_ninfo, new_ninfo;
                new_ninfo.set_offset(read1->pos, read2->pos);

                map<unsigned int, NeighborInfo>::iterator iter_old_ninfo = Neighbors.getData()[readid1].find(readid2);
                bool seen = (iter_old_ninfo!=Neighbors.getData()[readid1].end());
                if( seen ) old_ninfo = iter_old_ninfo->second;
                if( seen && new_ninfo==old_ninfo) continue;

                const DNASeq& seq1 = Reads[readid1];
                const DNASeq& seq2 = Reads[readid2];

                bool is_neighbor = new_ninfo.isNeighbor(seq1, seq2, opt.K, opt.h, opt.e);
                bool is_better = seen ? is_neighbor&&new_ninfo.isBetter(old_ninfo, seq1.size(), seq2.size()) : true;

                if((!seen && is_neighbor) || (seen && is_better)) {
                    Neighbors.getData()[readid1][readid2] = NeighborInfo(read1->pos, read2->pos, new_ninfo.get_nerr());
                    if(readid2 >= read_st && readid2 < read_ed) {
                        Neighbors.getData()[readid2][readid1] = NeighborInfo(read2->pos, read1->pos, new_ninfo.get_nerr());
                    }
                }
                // In case the neighbor already exists, choose the one with minimum mismatch.
            }
        }
    }
    Neighbors.dump(opt.fpre, opt.fsuf);

    return 0;
}