File: rbcc.cc

package info (click to toggle)
maq 0.7.1-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,500 kB
  • sloc: cpp: 5,025; ansic: 3,333; sh: 3,303; perl: 2,547; makefile: 31
file content (117 lines) | stat: -rw-r--r-- 3,894 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include <unistd.h>
#include <zlib.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#include <stdio.h>
#include "main.h"
#include "const.h"
#include "stdhash.hh"

hash_map_char<int> *cns_load_dbSNP(FILE *fp_dbSNP)
{
	hash_map_char<int> *hash;
	char name[256], str1[256], str2[256];
	float het_rate;
	int qual, pos;
	hash = new hash_map_char<int>;
	hash->resize(100000);
	while (fscanf(fp_dbSNP, "%s%d%s%s%d%f", name, &pos, str1, str2, &qual, &het_rate) == 6) {
		sprintf(str1, "%s.%d", name, pos);
		hash->insert(str1, qual);
	}
	return hash;
}
static inline bit32_t rbcc_core(bit32_t high, bit32_t hrate)
{
	bit32_t b[3], q[2], tmp;
	b[0] = high>>24&0xf; b[1] = high>>12&0xf; b[2] = high>>8&0xf;
	q[0] = high>>16&0xff; q[1] = high&0xff;
	if (nst_nt16_count_table[b[0]] == 2) { // the best call is a het
		q[0] += hrate;
	} else if (nst_nt16_count_table[b[1]] == 2) { // the second best call is a het
		if (q[0] - hrate < 0) { // then the het should be the best call
			tmp = b[0]; b[0] = b[1]; b[1] = tmp; // swap b[0] and b[1]
			q[1] += q[0]; q[0] = hrate - q[0];
		} else { // just adjust qualities
			q[0] -= hrate; q[1] += hrate;
		}
	} else if (nst_nt16_count_table[b[2]] == 2) { // the third is a het
		if (q[0] + q[1] < hrate) { // then the het should be the best call
			tmp = b[0]; b[0] = b[2]; b[2] = b[1]; b[1] = tmp; // swap
			tmp = q[0]; q[0] = hrate - q[0] - q[1]; q[1] = tmp;
		} else if (q[1] < hrate) { // then the het should be the second best call
			tmp = b[1]; b[1] = b[2]; b[2] = tmp;
			tmp = q[0] + q[1] - hrate; q[1] = hrate - q[1]; q[0] = tmp;
		} else q[1] -= hrate; // just adjust qualities
	}
	if (q[0] > 255) q[0] = 255;
	if (q[1] > 255) q[1] = 255;
	return (high&0xf0000000u) | b[0]<<24 | b[1]<<12 | b[2]<<8 | q[0]<<16 | q[1];
}
void cns_rbcc(gzFile fpout, gzFile fp, FILE *fp_dbSNP, float r_ori, float r_good, float r_bad)
{
	int i, len;
	char key[256];
	bit32_t q_good, q_bad;
	assert(r_ori < 1.0 && r_good < 1.0 && r_bad < 1.0);
	q_good = int(4.343 * log(r_good / (1.0-r_good) / (r_ori / (1.0-r_ori))) + 0.5);
	q_bad  = int(4.343 * log(r_bad  / (1.0-r_bad ) / (r_ori / (1.0-r_ori))) + 0.5);
	hash_map_char<int> *hash = cns_load_dbSNP(fp_dbSNP);
	fprintf(stderr, "[cns_rbcc] %d SNPs are loaded.\n", hash->size());
	while (gzread(fp, &len, sizeof(int))) {
		gzwrite(fpout, &len, sizeof(int));
		char *name = (char*)malloc(len);
		gzread(fp, name, len);
		gzwrite(fpout, name, len);
		gzread(fp, &len, sizeof(int));
		gzwrite(fpout, &len, sizeof(int));
		for (i = 0; i != len; ++i) {
			bit32_t low, high;
			int qual;
			gzread(fp, &low, sizeof(bit32_t));
			gzwrite(fpout, &low, sizeof(bit32_t));
			gzread(fp, &high, sizeof(bit32_t));
			sprintf(key, "%s.%d", name, i+1);
			if (hash->find(key, &qual))
				high = rbcc_core(high, qual? q_good : q_bad);
			gzwrite(fpout, &high, sizeof(bit32_t));
		}
		free(name);
	}
	delete hash;
}

int ma_rbcc(int argc, char *argv[])
{
	gzFile fp, fpout;
	FILE *fp_dbSNP;
	int c;
	float r_ori, r_good, r_bad;
	r_ori = 0.001; r_good = 0.1; r_bad = 0.01;
	while ((c = getopt(argc, argv, "r:g:b:")) >= 0) {
		switch (c) {
		case 'r': r_ori = atof(optarg); break;
		case 'g': r_good = atof(optarg); break;
		case 'b': r_bad = atof(optarg); break;
		}
	}
	if(argc - optind < 3) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   maq rbcc [options] <out.cns> <in.cns> <chr_rpt.snp>\n\n");
		fprintf(stderr, "Options: -r FLOAT    original prior [0.001]\n");
		fprintf(stderr, "         -g FLOAT    prior for validated SNPs [0.1]\n");
		fprintf(stderr, "         -b FLOAT    prior for the rest of SNPs [0.01]\n\n");
		return 1;
	}
	fpout = gzopen(argv[optind], "w");
	fp = gzopen(argv[optind+1], "r");
	fp_dbSNP = fopen(argv[optind+2], "r");
	assert(fp && fpout && fp_dbSNP);
	cns_rbcc(fpout, fp, fp_dbSNP, r_ori, r_good, r_bad);
	gzclose(fp);
	gzclose(fpout);
	fclose(fp_dbSNP);
	return 0;
}