File: KmerHistTests.cpp

package info (click to toggle)
salmon 0.7.2%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 4,352 kB
  • ctags: 5,243
  • sloc: cpp: 42,341; ansic: 6,252; python: 228; makefile: 207; sh: 190
file content (137 lines) | stat: -rw-r--r-- 3,969 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include "UtilityFunctions.hpp"

// from http://stackoverflow.com/questions/2380962/generate-all-combinations-of-arbitrary-alphabet-up-to-arbitrary-length
std::vector<std::string> getAllWords(int length) {

    int N_LETTERS = 4;
    char alphabet[] = {'A', 'C', 'G', 'T'};
  std::vector<int> index(length, 0);
  std::vector<std::string> words;

  while(true)
  {
    std::string word(length, ' ');
    for (int i = 0; i < length; ++i)
      word[i] = alphabet[index[i]];
    words.push_back(word);

    for (int i = length-1; ; --i)
    {
      if (i < 0) return words;
      index[i]++;
      if (index[i] == N_LETTERS)
        index[i] = 0;
      else
        break;
    }
  }
}

#include <atomic>

SCENARIO("Kmers encode and decode correctly") {
    using salmon::utils::Direction;
    GIVEN("All 6-mers") {
        std::vector<std::string> kmers = getAllWords(6);
        //KmerDist<6, std::atomic<uint32_t>> kh;
        for (auto& k : kmers) {
            auto i = indexForKmer(k.c_str(), 6, Direction::FORWARD);
            auto kp = kmerForIndex(i, 6);
            WHEN("kmer is [" + k + "]") {
                THEN("decodes as [" + kp + "]") {
                    REQUIRE(k == kp);
                }
            }
        }
    }
}

std::string rc(const std::string& s) {
    std::string rc;
    for (int32_t i = s.size() - 1; i >= 0; --i) {
        switch(s[i]) {
        case 'A': rc += 'T'; break;
        case 'C': rc += 'G'; break;
        case 'G': rc += 'C'; break;
        case 'T': rc += 'A'; break;
        }
    }
    return rc;
};

SCENARIO("Kmers encode and decode correctly (reverse complement)") {
    using salmon::utils::Direction;
    GIVEN("All 6-mers") {
        std::vector<std::string> kmers = getAllWords(6);
        //KmerDist<6, std::atomic<uint32_t>> kh;
        for (auto& k : kmers) {
            auto i = indexForKmer(k.c_str(), 6, Direction::REVERSE_COMPLEMENT);
            auto kp = kmerForIndex(i, 6);
            auto krc = rc(k);
            WHEN("kmer is [" + k + "]") {
                THEN("decodes as [" + kp + "]") {
                    REQUIRE(krc == kp);
                }
            }
        }
    }
}


SCENARIO("The next k-mer index function works correctly") {
    using salmon::utils::Direction;
    const uint32_t K = 6;
    std::string s = "ATTCTCCACATAGTTGTCATCGAACCAGTACCCCGTAAGCGCCAACATAT";

    GIVEN("The string " + s) {
        auto idx = indexForKmer(s.c_str(), 6, Direction::FORWARD);
        std::string k = s.substr(0, 6);
        WHEN("kmer is [" + k + "]") {
            auto kp = kmerForIndex(idx, 6);
            THEN("decodes as [" + kp + "]") {
                REQUIRE(k == kp);
            }
        }
        for (size_t i = 0; i < s.size() - K; ++i) {
            idx = nextKmerIndex(idx, s[i+K], 6, Direction::FORWARD);
            k = s.substr(i+1, 6);
            WHEN("kmer is [" + k + "]") {
                auto kp = kmerForIndex(idx, 6);
                THEN("decodes as [" + kp + "]") {
                    REQUIRE(k == kp);
                }
            }
        }
    }

    //auto rcs = rc(s);

    GIVEN("The string " + s + " in the reverse complement direction") {
        auto idx = indexForKmer(s.c_str(), 6, Direction::REVERSE_COMPLEMENT);
        std::string k = rc(s.substr(0, 6));
        WHEN("kmer is [" + k + "]") {
            auto kp = kmerForIndex(idx, 6);
            THEN("decodes as [" + kp + "]") {
                REQUIRE(k == kp);
            }
        }
        const char* seq = s.c_str();
	for (size_t i = 0; i < s.size() - K; ++i) {
	  idx = nextKmerIndex(idx, s[i+K], 6, Direction::REVERSE_COMPLEMENT);
	  auto idx2 = indexForKmer(seq+i+1, 6, Direction::REVERSE_COMPLEMENT);
	  k = rc(s.substr(i+1, 6));
	  WHEN("kmer is [" + k + "]") {
	    auto kp = kmerForIndex(idx, 6);
	    THEN("decodes as [" + kp + "]") {
	      REQUIRE(k == kp);
	    }
        THEN("incremental decoding works") {
            REQUIRE(idx == idx2);
        }
	  }
	}
    }

}