File: Repeats.cpp

package info (click to toggle)
libsmithwaterman 0.0%2Bgit20160702.2610e25-12
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 292 kB
  • sloc: cpp: 2,115; makefile: 59; sh: 16
file content (69 lines) | stat: -rw-r--r-- 1,893 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include "Repeats.h"

map<string, int> repeatCounts(long int position, const string& sequence, int maxsize) {
    map<string, int> counts;
    for (int i = 1; i <= maxsize; ++i) {
        // subseq here i bases
        string seq = sequence.substr(position, i);
        // go left.

        int j = position - i;
        int leftsteps = 0;
        while (j >= 0 && seq == sequence.substr(j, i)) {
            j -= i;
            ++leftsteps;
        }

        // go right.
        j = position;

        int rightsteps = 0;
        while (j + i <= sequence.size() && seq == sequence.substr(j, i)) {
            j += i;
            ++rightsteps;
        }
        // if we went left and right a non-zero number of times, 
        if (leftsteps + rightsteps > 1) {
            counts[seq] = leftsteps + rightsteps;
        }
    }

    // filter out redundant repeat information
    if (counts.size() > 1) {
        map<string, int> filteredcounts;
        map<string, int>::iterator c = counts.begin();
        string prev = c->first;
        filteredcounts[prev] = c->second;  // shortest sequence
        ++c;
        for (; c != counts.end(); ++c) {
            int i = 0;
            string seq = c->first;
            while (i + prev.length() <= seq.length() && seq.substr(i, prev.length()) == prev) {
                i += prev.length();
            }
            if (i < seq.length()) {
                filteredcounts[seq] = c->second;
                prev = seq;
            }
        }
        return filteredcounts;
    } else {
        return counts;
    }
}

bool isRepeatUnit(const string& seq, const string& unit) {

    if (seq.size() % unit.size() != 0) {
	return false;
    } else {
	int maxrepeats = seq.size() / unit.size();
	for (int i = 0; i < maxrepeats; ++i) {
	    if (seq.substr(i * unit.size(), unit.size()) != unit) {
		return false;
	    }
	}
	return true;
    }

}