1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
module bio.std.hts.bam.snpcallers.simple;
import bio.std.hts.bam.pileup;
import bio.core.utils.algo;
import std.algorithm;
struct SimpleCallerSettings {
int minimum_coverage = 5;
int minimum_witnesses = 2;
}
SimpleCallerSettings defaultSettings;
bool isSNP(C)(C column, ref SimpleCallerSettings settings)
{
if (column.coverage < settings.minimum_coverage) {
return false;
}
int[char] bases_count;
foreach (read; column.reads) {
if (read.current_base != '-') {
bases_count[read.current_base] += 1;
}
}
if (bases_count.length == 0) {
// e.g. all overlapping reads have deletions at this location
return false;
}
auto consensus = argmax!(base => bases_count[base])(bases_count.byKey());
if (bases_count[consensus] < settings.minimum_witnesses) {
return false;
}
return consensus != column.reference_base;
}
auto findSNPs(R)(R reads, ref SimpleCallerSettings settings=defaultSettings) {
auto columns = pileupWithReferenceBases(reads);
return filter!(column => isSNP(column, settings))(filter!"a.coverage > 0"(columns));
}
|