1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
#include <unistd.h>
#include <zlib.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
#include <string.h>
#include <stdio.h>
#include "main.h"
#include "const.h"
#include "stdhash.hh"
hash_map_char<int> *cns_load_dbSNP(FILE *fp_dbSNP)
{
hash_map_char<int> *hash;
char name[256], str1[256], str2[256];
float het_rate;
int qual, pos;
hash = new hash_map_char<int>;
hash->resize(100000);
while (fscanf(fp_dbSNP, "%s%d%s%s%d%f", name, &pos, str1, str2, &qual, &het_rate) == 6) {
sprintf(str1, "%s.%d", name, pos);
hash->insert(str1, qual);
}
return hash;
}
static inline bit32_t rbcc_core(bit32_t high, bit32_t hrate)
{
bit32_t b[3], q[2], tmp;
b[0] = high>>24&0xf; b[1] = high>>12&0xf; b[2] = high>>8&0xf;
q[0] = high>>16&0xff; q[1] = high&0xff;
if (nst_nt16_count_table[b[0]] == 2) { // the best call is a het
q[0] += hrate;
} else if (nst_nt16_count_table[b[1]] == 2) { // the second best call is a het
if (q[0] - hrate < 0) { // then the het should be the best call
tmp = b[0]; b[0] = b[1]; b[1] = tmp; // swap b[0] and b[1]
q[1] += q[0]; q[0] = hrate - q[0];
} else { // just adjust qualities
q[0] -= hrate; q[1] += hrate;
}
} else if (nst_nt16_count_table[b[2]] == 2) { // the third is a het
if (q[0] + q[1] < hrate) { // then the het should be the best call
tmp = b[0]; b[0] = b[2]; b[2] = b[1]; b[1] = tmp; // swap
tmp = q[0]; q[0] = hrate - q[0] - q[1]; q[1] = tmp;
} else if (q[1] < hrate) { // then the het should be the second best call
tmp = b[1]; b[1] = b[2]; b[2] = tmp;
tmp = q[0] + q[1] - hrate; q[1] = hrate - q[1]; q[0] = tmp;
} else q[1] -= hrate; // just adjust qualities
}
if (q[0] > 255) q[0] = 255;
if (q[1] > 255) q[1] = 255;
return (high&0xf0000000u) | b[0]<<24 | b[1]<<12 | b[2]<<8 | q[0]<<16 | q[1];
}
void cns_rbcc(gzFile fpout, gzFile fp, FILE *fp_dbSNP, float r_ori, float r_good, float r_bad)
{
int i, len;
char key[256];
bit32_t q_good, q_bad;
assert(r_ori < 1.0 && r_good < 1.0 && r_bad < 1.0);
q_good = int(4.343 * log(r_good / (1.0-r_good) / (r_ori / (1.0-r_ori))) + 0.5);
q_bad = int(4.343 * log(r_bad / (1.0-r_bad ) / (r_ori / (1.0-r_ori))) + 0.5);
hash_map_char<int> *hash = cns_load_dbSNP(fp_dbSNP);
fprintf(stderr, "[cns_rbcc] %d SNPs are loaded.\n", hash->size());
while (gzread(fp, &len, sizeof(int))) {
gzwrite(fpout, &len, sizeof(int));
char *name = (char*)malloc(len);
gzread(fp, name, len);
gzwrite(fpout, name, len);
gzread(fp, &len, sizeof(int));
gzwrite(fpout, &len, sizeof(int));
for (i = 0; i != len; ++i) {
bit32_t low, high;
int qual;
gzread(fp, &low, sizeof(bit32_t));
gzwrite(fpout, &low, sizeof(bit32_t));
gzread(fp, &high, sizeof(bit32_t));
sprintf(key, "%s.%d", name, i+1);
if (hash->find(key, &qual))
high = rbcc_core(high, qual? q_good : q_bad);
gzwrite(fpout, &high, sizeof(bit32_t));
}
free(name);
}
delete hash;
}
int ma_rbcc(int argc, char *argv[])
{
gzFile fp, fpout;
FILE *fp_dbSNP;
int c;
float r_ori, r_good, r_bad;
r_ori = 0.001; r_good = 0.1; r_bad = 0.01;
while ((c = getopt(argc, argv, "r:g:b:")) >= 0) {
switch (c) {
case 'r': r_ori = atof(optarg); break;
case 'g': r_good = atof(optarg); break;
case 'b': r_bad = atof(optarg); break;
}
}
if(argc - optind < 3) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: maq rbcc [options] <out.cns> <in.cns> <chr_rpt.snp>\n\n");
fprintf(stderr, "Options: -r FLOAT original prior [0.001]\n");
fprintf(stderr, " -g FLOAT prior for validated SNPs [0.1]\n");
fprintf(stderr, " -b FLOAT prior for the rest of SNPs [0.01]\n\n");
return 1;
}
fpout = gzopen(argv[optind], "w");
fp = gzopen(argv[optind+1], "r");
fp_dbSNP = fopen(argv[optind+2], "r");
assert(fp && fpout && fp_dbSNP);
cns_rbcc(fpout, fp, fp_dbSNP, r_ori, r_good, r_bad);
gzclose(fp);
gzclose(fpout);
fclose(fp_dbSNP);
return 0;
}
|