File: rank_sum.cc

package info (click to toggle)
soapsnp 1.03-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 300 kB
  • sloc: cpp: 1,684; makefile: 42
file content (128 lines) | stat: -rw-r--r-- 4,817 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "soap_snp.h"

int Prob_matrix::rank_table_gen() {
	// When N <= 63, (so that n1<=31), use this table to test
	ubit64_t i, n1, N, T1;
	rate_t p_left, p_right;

	// Calculate the factorials
	double * fact = new double [64];
	fact[0]=(double)1.0;
	for(i=1;i!=64;i++) {
		fact[i] = fact[i-1]*i;
	}

	ubit64_t * rank_sum= new ubit64_t [64*64*2048]; // 6bit: N; 5bit: n1; 11bit; T1
	memset(rank_sum, 0, sizeof(ubit64_t)*64*64*2048);
	rank_sum[0]=1;
	for(N=1;N!=64;N++) {
		//cerr<<N<<endl;
		for(n1=0;n1<=N;n1++) {
			//cerr<<n1<<endl;
			for(T1=(1+n1)*n1/2;T1<=(N+N-n1+1)*n1/2;T1++) {
				//cerr<<T1<<endl;
				// Dynamic programming to generate the table
				rank_sum[N<<17|n1<<11|T1] = rank_sum[((N-1)<<17)|(n1<<11)|T1] + ((T1>=N && n1>0) ? rank_sum[((N-1)<<17)|((n1-1)<<11)|(T1-N)]:0);
				// Here, the p_rank is not cumulative
				p_rank[(N<<17)|(n1<<11)|T1] = rank_sum[N<<17|n1<<11|T1] / (fact[N]/(fact[n1]*fact[N-n1]));
				//cerr<<p_rank[(N<<17)|(n1<<11)|T1]<<endl;
			}
			p_left = 0.0, p_right =1.0;
			for(T1=(1+n1)*n1/2;T1<=(N+N-n1+1)*n1/2;T1++) {
				p_right = 1.0 - p_left;
				p_left += p_rank[(N<<17)|(n1<<11)|T1];
				p_rank[N<<17|n1<<11|T1] = (p_left<p_right?p_left:p_right);
				//std::cerr<<N<<"\t"<<n1<<"\t"<<T1<<"\t"<<p_rank[(N<<17)|(n1<<11)|T1]<<endl;
			}
		}
	}
	delete [] rank_sum;
	delete [] fact;
	return 1;
}

double Call_win::normal_test(int n1, int n2, double T1, double T2) {
	double u1, u2;
	u1 = (T1 - n1*(n1+n2+1)/2) / sqrt(n1*n2*(n1+n2+1)/(double)12);
	u2 = (T2 - n2*(n1+n2+1)/2) / sqrt(n1*n2*(n1+n2+1)/(double)12);
	return normal_value(fabs(u1)>fabs(u2)?u1:u2);
}

double Call_win::table_test(double *p_rank, int n1, int n2, double T1, double T2) {
	if(n1<=n2) {
		return p_rank[(n1+n2)<<17|n1<<11|(int)(T1)]+(T1-(int)T1)*(p_rank[(n1+n2)<<16|n1<<11|(int)(T1+1)]-p_rank[(n1+n2)<<17|n1<<11|(int)(T1)]);
	}
	else {
		return p_rank[(n1+n2)<<17|n2<<11|(int)(T2)]+(T2-(int)T2)*(p_rank[(n1+n2)<<16|n2<<11|(int)(T2+1)]-p_rank[(n1+n2)<<17|n2<<11|(int)(T2)]);
	}
}

double Call_win::rank_test(Pos_info & info, char best_type, double * p_rank, Parameter * para) {
	if( (best_type&3) == ((best_type>>2)&3) ) {
		// HOM
		return 1.0;
	}
	if( info.count_uni[best_type&3]==0 || info.count_uni[(best_type>>2)&3]==0) {
		// HET with one allele...
		return 0.0;
	}
	//cerr<<"RankSum:"<<info.pos<<endl;
	//int * same_qual_count = new int [para->q_max-para->q_min+1];
	//memset(same_qual_count, 0, sizeof(int)*(para->q_max-para->q_min+1));
	//double * rank_array= new double [para->q_max-para->q_min+1];
	//memset(rank_array, 0, sizeof(double)*(para->q_max-para->q_min+1));
	int *same_qual_count = new int [64];
	double *rank_array = new double [64];
	memset(same_qual_count,0,sizeof(int)*64);
	memset(rank_array,0,sizeof(double)*64);

	int rank(0);
	double T[4]={0.0, 0.0, 0.0, 0.0};
	bool is_need[4] ={false,false,false,false};
	is_need[(best_type&3)]=true; is_need[((best_type>>2)&3)]=true;
	std::string::size_type o_base, strand;
	int  q_score, coord;
	for(o_base=0;o_base!=4;o_base++) {
		if(info.count_uni[o_base]==0 || !is_need[o_base]) continue;
		for(q_score=para->q_max-para->q_min;q_score>=0;q_score--) {
			for(coord=para->read_length-1;coord>=0;coord--) {
				for(strand=0;strand<2;strand++) {
					same_qual_count[q_score] += info.base_info[o_base<<15|strand<<14|q_score<<8|coord];
					//if(info.pos==1256 && info.base_info[o_base<<13|strand<<12|q_score<<6|coord]!=0) {
					//	cerr<<info.pos<<"\t"<<q_score<<"\t"<<same_qual_count[q_score]<<"\t"<<int(info.base_info[o_base<<13|strand<<12|q_score<<6|coord])<<endl;
					//}
				}
			}
		}
	}
	rank = 0;
	for(q_score=0;q_score<=(ubit64_t)(para->q_max-para->q_min+1);q_score++) {
		rank_array[q_score]= rank+(1+same_qual_count[q_score])/2.0;
		rank += same_qual_count[q_score];
	}
	for(o_base=0;o_base!=4;o_base++) {
		if(info.count_uni[o_base]==0 || !is_need[o_base]) continue;
		for(q_score=para->q_max-para->q_min;q_score>=0;q_score--) {
			for(coord=para->read_length-1;coord>=0;coord--) {
				for(strand=0;strand<2;strand++) {
					//cerr<<"ACTG"[o_base]<<endl;
					T[o_base] += (rank_array[q_score] * info.base_info[o_base<<15|strand<<14|q_score<<8|coord]);
					//cerr<<T[o_base]<<endl;
				}
			}
		}
	}
	delete [] same_qual_count;
	delete [] rank_array;
	//for(size_t a=0;a!=4;a++) {
	//	fprintf(stderr, "%lf\t", T[a]);
	//}
	//fprintf(stderr, "\n");
	//std::cerr<<"%d\t%d\t%lf\t%lf", info.count_uni[best_type&3],  info.count_uni[(best_type>>2)&3], T[best_type&3], T[(best_type>>2)&3]);
	if (info.count_uni[best_type&3]+info.count_uni[(best_type>>2)&3]<64) {
		return table_test(p_rank, info.count_uni[best_type&3], info.count_uni[(best_type>>2)&3], T[best_type&3], T[(best_type>>2)&3]);
	}
	else {
		return normal_test(info.count_uni[best_type&3], info.count_uni[(best_type>>2)&3],T[best_type&3], T[(best_type>>2)&3]);
	}
}