File: errmod.c

package info (click to toggle)
samtools-legacy 0.1.19-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, stretch
  • size: 1,976 kB
  • ctags: 3,083
  • sloc: ansic: 22,454; perl: 3,037; makefile: 211; java: 158; python: 141
file content (130 lines) | stat: -rw-r--r-- 3,485 bytes parent folder | download | duplicates (18)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#include <math.h>
#include "errmod.h"
#include "ksort.h"
KSORT_INIT_GENERIC(uint16_t)

typedef struct __errmod_coef_t {
	double *fk, *beta, *lhet;
} errmod_coef_t;

typedef struct {
	double fsum[16], bsum[16];
	uint32_t c[16];
} call_aux_t;

static errmod_coef_t *cal_coef(double depcorr, double eta)
{
	int k, n, q;
	long double sum, sum1;
	double *lC;
	errmod_coef_t *ec;

	ec = calloc(1, sizeof(errmod_coef_t));
	// initialize ->fk
	ec->fk = (double*)calloc(256, sizeof(double));
	ec->fk[0] = 1.0;
	for (n = 1; n != 256; ++n)
		ec->fk[n] = pow(1. - depcorr, n) * (1.0 - eta) + eta;
	// initialize ->coef
	ec->beta = (double*)calloc(256 * 256 * 64, sizeof(double));
	lC = (double*)calloc(256 * 256, sizeof(double));
	for (n = 1; n != 256; ++n) {
		double lgn = lgamma(n+1);
		for (k = 1; k <= n; ++k)
			lC[n<<8|k] = lgn - lgamma(k+1) - lgamma(n-k+1);
	}
	for (q = 1; q != 64; ++q) {
		double e = pow(10.0, -q/10.0);
		double le = log(e);
		double le1 = log(1.0 - e);
		for (n = 1; n <= 255; ++n) {
			double *beta = ec->beta + (q<<16|n<<8);
			sum1 = sum = 0.0;
			for (k = n; k >= 0; --k, sum1 = sum) {
				sum = sum1 + expl(lC[n<<8|k] + k*le + (n-k)*le1);
				beta[k] = -10. / M_LN10 * logl(sum1 / sum);
			}
		}
	}
	// initialize ->lhet
	ec->lhet = (double*)calloc(256 * 256, sizeof(double));
	for (n = 0; n < 256; ++n)
		for (k = 0; k < 256; ++k)
			ec->lhet[n<<8|k] = lC[n<<8|k] - M_LN2 * n;
	free(lC);
	return ec;
}

errmod_t *errmod_init(float depcorr)
{
	errmod_t *em;
	em = (errmod_t*)calloc(1, sizeof(errmod_t));
	em->depcorr = depcorr;
	em->coef = cal_coef(depcorr, 0.03);
	return em;
}

void errmod_destroy(errmod_t *em)
{
	if (em == 0) return;
	free(em->coef->lhet); free(em->coef->fk); free(em->coef->beta);
	free(em->coef); free(em);
}
// qual:6, strand:1, base:4
int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q)
{
	call_aux_t aux;
	int i, j, k, w[32];

	if (m > m) return -1;
	memset(q, 0, m * m * sizeof(float));
	if (n == 0) return 0;
	// calculate aux.esum and aux.fsum
	if (n > 255) { // then sample 255 bases
		ks_shuffle(uint16_t, n, bases);
		n = 255;
	}
	ks_introsort(uint16_t, n, bases);
	memset(w, 0, 32 * sizeof(int));
	memset(&aux, 0, sizeof(call_aux_t));
	for (j = n - 1; j >= 0; --j) { // calculate esum and fsum
		uint16_t b = bases[j];
		int q = b>>5 < 4? 4 : b>>5;
		if (q > 63) q = 63;
		k = b&0x1f;
		aux.fsum[k&0xf] += em->coef->fk[w[k]];
		aux.bsum[k&0xf] += em->coef->fk[w[k]] * em->coef->beta[q<<16|n<<8|aux.c[k&0xf]];
		++aux.c[k&0xf];
		++w[k];
	}
	// generate likelihood
	for (j = 0; j != m; ++j) {
		float tmp1, tmp3;
		int tmp2, bar_e;
		// homozygous
		for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k != m; ++k) {
			if (k == j) continue;
			tmp1 += aux.bsum[k]; tmp2 += aux.c[k]; tmp3 += aux.fsum[k];
		}
		if (tmp2) {
			bar_e = (int)(tmp1 / tmp3 + 0.499);
			if (bar_e > 63) bar_e = 63;
			q[j*m+j] = tmp1;
		}
		// heterozygous
		for (k = j + 1; k < m; ++k) {
			int cjk = aux.c[j] + aux.c[k];
			for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i < m; ++i) {
				if (i == j || i == k) continue;
				tmp1 += aux.bsum[i]; tmp2 += aux.c[i]; tmp3 += aux.fsum[i];
			}
			if (tmp2) {
				bar_e = (int)(tmp1 / tmp3 + 0.499);
				if (bar_e > 63) bar_e = 63;
				q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]] + tmp1;
			} else q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]]; // all the bases are either j or k
		}
		for (k = 0; k != m; ++k) if (q[j*m+k] < 0.0) q[j*m+k] = 0.0;
	}
	return 0;
}