File: purge.c

package info (click to toggle)
glam2 1064-9
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 956 kB
  • sloc: ansic: 6,925; xml: 757; asm: 74; makefile: 54; sh: 11
file content (127 lines) | stat: -rw-r--r-- 3,565 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/* purge.c - homology purge program */
#include "purge.h"

Boolean	RmHomologs(long cutoff, char method, long minimum, Boolean query, 
	ss_type P)
/** Use block bit array for edges to save space **/
{
	long maxleng,i,k,entry,*lengs,*list;
	long item,item1,*hits,N;
	b_type	*edges;
	FILE	*fptr;
	dh_type	H;
	gb_typ	B = NULL;
	keytyp	key;
	e_type	E,E1,E2;
	char	str[100];
	Boolean	made_file=FALSE,related;

	N = NSeqsSeqSet(P);
	H = dheap(N+2,3);
	NEW(edges,N+2,b_type); 
	NEW(list,N+2,long); NEW(lengs,N+2,long); NEW(hits,N+2,long);
	for(maxleng=0,entry =1 ;entry <= N;entry++) {
		E=SeqSetE(entry,P);
		lengs[entry]=LenSeq(E);
		edges[entry] = Block(N+1);
		maxleng = tMAX(long,maxleng,LenSeq(E));
	}
	maxleng *= 2;
	entry = 1;
	if(cutoff < 99999) {
	  for(item =1 ;item <= N;item++) {
	    E1=SeqSetE(item,P);
	    if(method == 'b') B=MakeGBlast(11, E1, SeqSetA(P));
	    for(item1 =item+1 ;item1 <= N;item1++) {
	        E2=SeqSetE(item1,P);
		if(method == 'h'){	/** fasta heuristic method **/
		     related = RelateSeqFastp(E1,E2,SeqSetA(P),cutoff);
		} else if(method == 'b'){ /** gblast heuristic method **/
			related = FastMatcherGBlast(E2, B, cutoff);
/*****
			if(item==3 && item1==40) MatcherGBlast(stderr,E2, B);
*****/
		} else related = RelatedSeqs(cutoff, E1, E2, SeqSetA(P));
		if(related){
			AddBlock(item,edges[item1]);
			AddBlock(item1,edges[item]);
			hits[item]++; hits[item1]++;
		}
	    }
	    if(method == 'b') NilGBlast(B);
	    fprintf(stderr,"\r%ld",entry++); /**/
	  }
	}
	fprintf(stderr,"\n%ld items compared\n", entry-1); 
 if(!query){
	for(entry=1; entry <= N; entry++){
		key = -(keytyp) hits[entry];
		key += (keytyp)lengs[entry]/(keytyp)maxleng;
		/*if(hits[entry] > 0)
		     fprintf(stdout,"#%d: %d hits; key = %f\n",
			entry,hits[entry],key);*/
		insrtHeap(entry,key,H);
	}
	while(TRUE){
	    if(emptyHeap(H)) print_error("error in RmHomologs( )");
	    else if(minkeyHeap(H) >= 0) break;
	    else {
	 	item=delminHeap(H);
		for(i=1;i <= N;i++){
		  if(MemberBlock(i,edges[item]) && memHeap(i,H)){
			hits[i]--;
			key = - (keytyp) hits[i];
			key += (keytyp)lengs[i]/(keytyp)maxleng;
			chkeyHeap(i,key,H);
		  }
		}
	    }
	}
  } else {	/** don't purge #1 from set; i.e., don't put in heap. **/
	for(entry=2; entry <= N; entry++){
		key = -(keytyp) hits[entry];
		key += (keytyp)lengs[entry]/(keytyp)maxleng;
		/*if(hits[entry] > 0)
		     fprintf(stdout,"#%d: %d hits; key = %f\n",
			entry,hits[entry],key);*/
		insrtHeap(entry,key,H);
	}
	while(TRUE){
	    if(emptyHeap(H) || minkeyHeap(H) >= 0) break;
	    else {
	 	item=delminHeap(H);
		for(i=2;i <= N;i++){
		  if(MemberBlock(i,edges[item]) && memHeap(i,H)){
			hits[i]--;
			key = - (keytyp) hits[i];
			key += (keytyp)lengs[i]/(keytyp)maxleng;
			chkeyHeap(i,key,H);
		  }
		}
	    }
	}
	insrtHeap(1,-1e99,H);
  }
	if(ItemsInHeap(H) < N+1){		/* TLB; allow all seqs thru */
	 if(ItemsInHeap(H) >= minimum){
	  sprintf(str,"%s.%c%ld",NameSeqSet(P),method,cutoff);
	  fptr = open_file(str,"","w");
	  for(entry=1,k=0; entry <= N; entry++){
	    E=SeqSetE(entry,P); 
	    if(memHeap(entry,H)) { PutSeqSetE(fptr,entry,P); k++; }
	  }
	  fclose(fptr);
	  fprintf(stderr,"%ld sequences put into file: '%s'\n",k,str);
	  made_file = TRUE;
	 } else fprintf(stderr,
		"less than %ld sequences left (%ld); file not created\n",
		minimum,ItemsInHeap(H));
	} else fprintf(stderr,"no sequences removed; file not created\n");
	for(entry=1; entry <= N; entry++) {
		NilBlock(edges[entry]);
	}
	free(lengs); free(hits); free(list); free(edges);
	Nildheap(H);
	return made_file;
}