File: simple.d

package info (click to toggle)
sambamba 1.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,528 kB
  • sloc: sh: 220; python: 166; ruby: 147; makefile: 103
file content (47 lines) | stat: -rw-r--r-- 1,151 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
module bio.std.hts.bam.snpcallers.simple;

import bio.std.hts.bam.pileup;
import bio.core.utils.algo;

import std.algorithm;

struct SimpleCallerSettings {
    int minimum_coverage = 5;
    int minimum_witnesses = 2;
}

SimpleCallerSettings defaultSettings;

bool isSNP(C)(C column, ref SimpleCallerSettings settings)
{
    if (column.coverage < settings.minimum_coverage) {
        return false;
    }

    int[char] bases_count;

    foreach (read; column.reads) {
        if (read.current_base != '-') {
            bases_count[read.current_base] += 1;
        }
    }

    if (bases_count.length == 0) {
        // e.g. all overlapping reads have deletions at this location
        return false;
    }

    auto consensus = argmax!(base => bases_count[base])(bases_count.byKey());

    if (bases_count[consensus] < settings.minimum_witnesses) {
        return false;
    }

    return consensus != column.reference_base;
}

auto findSNPs(R)(R reads, ref SimpleCallerSettings settings=defaultSettings) {
    auto columns = pileupWithReferenceBases(reads);

    return filter!(column => isSNP(column, settings))(filter!"a.coverage > 0"(columns));
}