File: extratools.c

package info (click to toggle)
soapaligner 2.20-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 768 kB
  • sloc: ansic: 10,051; makefile: 236
file content (234 lines) | stat: -rw-r--r-- 7,669 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#include "extratools.h"

//This file includes the implementations of all the extra tools adding to all steps
//e.g. Look Up Table
//     Hash Table
//     All things like those

BWT * occBwt;
HASHTABLE * occHashtable;
unsigned int * occCollector;
unsigned int occCollected;

FILE * textPositionFile;

void LoadLookupTable(LOOKUPTABLE * lookupTable, const char * fileName, const int tableSize)  {
    (*lookupTable).tableSize = tableSize;
	unsigned long long NR_TOP = 1 << (tableSize * 2);
	(*lookupTable).table = malloc(sizeof(unsigned) * NR_TOP);
	int fin = open(fileName, O_RDONLY);
	unsigned step = 1048576;
	unsigned int i;
	for (i = 0; i < NR_TOP; i += step) {
		read(fin, (*lookupTable).table + i, step * sizeof(*(*lookupTable).table));
	}
	close(fin);
}

unsigned int LookupSafe(LOOKUPTABLE lookupTable, BWT * bwt,
                        unsigned long long lKey, unsigned long long rKey,
                        unsigned int *l, unsigned int *r) {

	*l = lKey ? lookupTable.table[lKey-1]+1 : 1;
	*r = lookupTable.table[rKey];

	if (*l == bwt->inverseSa0) {
		(*l)++;
	}

	return *r-*l+1;
}

unsigned int retrieveSA=0,retrieveHASH=0;
double textPositionTime, textPositionTimeTotal = 0;
unsigned int writeQIndex;

double getTextPositionTime() {return textPositionTimeTotal;}
unsigned int getSARetrieved() {return retrieveSA;}
unsigned int getHASHRetrieved() {return retrieveHASH;}

void FreeLookupTable(LOOKUPTABLE * lookupTable) {
     free((*lookupTable).table);
}

void LoadHashTable(HASHTABLE * hashTable, const char * fileName) {
unsigned int ttlOccurrence=0;
unsigned int ttlItem=0;

    FILE *inFile;
    if(!(inFile = fopen(fileName, "r"))) return;
    fread((unsigned int *)&((*hashTable).tableSize),sizeof(unsigned int),1,inFile);
    fread((unsigned int *)&((*hashTable).a),sizeof(unsigned int),1,inFile);
    fread((unsigned int *)&((*hashTable).b),sizeof(unsigned int),1,inFile);
    fread((unsigned int *)&((*hashTable).prime),sizeof(unsigned int),1,inFile);
    fread((unsigned int *)&ttlItem,sizeof(unsigned int),1,inFile);
    fread((unsigned int *)&ttlOccurrence,sizeof(unsigned int),1,inFile);

    //printf("Initializing the hash table..(n=%u)\n",(*hashTable).tableSize);
    (*hashTable).table = (HASHCELL*) malloc(sizeof(HASHCELL)*((*hashTable).tableSize));
    (*hashTable).itemList = (HASHITEM*) malloc(sizeof(HASHITEM)*ttlItem);
    (*hashTable).occList = (OCC*) malloc(sizeof(OCC)*ttlOccurrence);
    //printf("Initialized the hash table..\n");
    unsigned int i;
    for (i=0;i<((*hashTable).tableSize);i++) {
        char mk;
        fread((char *) &mk,1,1,inFile);
        if (mk==0) {
            //Empty cell
            (*hashTable).table[i].index=0;
            (*hashTable).table[i].count=0;
        } else {
            fread((unsigned int *)&((*hashTable).table[i].index),sizeof(unsigned int),1,inFile);
            fread((unsigned int *)&((*hashTable).table[i].count),sizeof(unsigned int),1,inFile);
        }
    }
    for (i=0;i<ttlItem;i++) {
        fread((unsigned int *)&((*hashTable).itemList[i].l),sizeof(unsigned int),1,inFile);
        fread((unsigned int *)&((*hashTable).itemList[i].r),sizeof(unsigned int),1,inFile);
        fread((unsigned int *)&((*hashTable).itemList[i].occIndex),sizeof(unsigned int),1,inFile);
    }
    for (i=0;i<ttlOccurrence;i++) {
        fread((unsigned int *)&((*hashTable).occList[i]),sizeof(unsigned int),1,inFile);
    }
    fclose(inFile);

}

void FreeHashTable(HASHTABLE * hashTable) {
     free((*hashTable).table);
     free((*hashTable).itemList);
     free((*hashTable).occList);
}

unsigned int Hash(HASHTABLE * hashTable, unsigned int key) {
    //g(x)=(ax+b) mod p
    unsigned long long multipleA=(key * (*hashTable).a)% (*hashTable).prime;
    unsigned int g=(unsigned int) (( multipleA + (*hashTable).b ) % (*hashTable).prime);

    //f(x)=g(x) mod 2n
    unsigned int f=g % ((*hashTable).tableSize);

    return f;
}

HASHITEM * HashFind(HASHTABLE * hashTable, unsigned int l,unsigned int r) {
    unsigned int hashedIndex=Hash(hashTable,l);
    unsigned int index=(*hashTable).table[hashedIndex].index;
    unsigned int count=(*hashTable).table[hashedIndex].count;

    if (hashedIndex>=0 && hashedIndex<(*hashTable).tableSize) {
        unsigned int i=0;
        while (i<count && ((*hashTable).itemList[index+i].l!=l || (*hashTable).itemList[index+i].r!=r)) {
            i++;
        }
        if (i>=count) return NULL;
        return &((*hashTable).itemList[index+i]);
    }
    return NULL;
}

void RegisterDecoder(BWT * bwt,HASHTABLE * hashTable) {
     occBwt=bwt;
     occHashtable=hashTable;
     occCollected=0;
     retrieveSA=0;
     retrieveHASH=0;
     textPositionTimeTotal = 0;
     writeQIndex = 0;
     //occCollector = malloc(sizeof (unsigned int) * 1024*1024);
}

unsigned int allOne = 0;
unsigned int OCCSection=0;

inline int CalMismatch(const char *seq, const unsigned int *ref, const unsigned int occPosCord, const unsigned int seqLen, const unsigned int dnaLength){
	unsigned int i, l;
	int match = 0;
//	fprintf(stderr, "%u\t%u\t%u\n", occPosCord, seqLen, dnaLength);
//	fprintf(stderr, "%u\n", ref[0]);
	for(i =0, l=occPosCord; i < seqLen && l < dnaLength; ++i, ++l){
//		fprintf(stderr, "%d,%u ", i, l);
		if(!(((*(seq+i))&0x3) ^ ((((*(ref+(l>>4)))>>(((~l)&0xf)<<1)))&0x3)))
			++match;
	}
//	fprintf(stderr, "match %d\n", match);
	return (seqLen-match);
}

#include <assert.h>

int OCCProcess(const unsigned int l,const unsigned int r, const BWTOPT *bo, const unsigned int info, HITTABLE *hits) {
#if TRUE
//	fprintf(stderr, "OCC Process, n occ %d\n", r-l+1);
#endif
	const unsigned int cutoff = bo->cutoff;
	if(hits->n >= bo->cutoff) return 0;
	unsigned int n = hits->n;
	HITITEM *hit = hits->itemList+n;
	ChrBlock *blockList   = bo->blockList;
	const unsigned int nblock   = bo->nblock;
	const unsigned int seqLen   = bo->seqLen;
	const unsigned int alnLen  = bo->alnLen;
	const unsigned int extLen = seqLen-alnLen;
	const unsigned int max_mm   = bo->max_mm+(info>>25 & 0x7);
	const unsigned int *pacRef  = bo->pacRef;
	const unsigned int dnaLength = bo->dnaLen;
	const unsigned int strain = (info>>24)&1;
	char *seq    =  strain?bo->rc:bo->fw;
//	for(i=0; i<extLen; ++i) fprintf(stderr,"%c","ACGT"[*(seq+i)]);
//	fprintf(stderr, "%d\n", extLen);
	unsigned int occ_pos  = 0;
	if (r-l+1 < 4) {
			//SA
		unsigned int k;
		for (k=l;k<=r && n < cutoff;k++) {
			occ_pos = BWTSaValue(occBwt,k);
			HitInc(n);
		}
		int inc = n - hits->n;
		hits->n = n;
		return inc;
	} else {
			//Hash
		HASHITEM *item = HashFind(occHashtable,l,r);
		if (item != NULL) {
			unsigned int k;
			for (k=0;k<item->r-item->l+1 && n < cutoff;k++) {
				occ_pos = occHashtable->occList[item->occIndex+k];
				HitInc(n);
			}
			int inc = n - hits->n;
			hits->n = n;
			return inc;
		} else {
			unsigned int k;
			for (k=l; k<=r && n < cutoff; k++) {
				occ_pos = BWTSaValue(occBwt,k);
				HitInc(n);
			}
			int inc = n - hits->n;
			hits->n = n;
			return inc;
		}
	}
}

void registerTPFile(FILE * filePtr,unsigned int searchMode) {
    textPositionFile=filePtr;
    fwrite(&searchMode,sizeof(unsigned int),1,textPositionFile);
	allOne=(1U<<31)-1;
	allOne<<=1;
	allOne+=1;
}

void registerQIndex(unsigned int index) {
    writeQIndex=index;
    OCCSection=0;
}
void registerQSection() {
    if (writeQIndex==0) {
        fwrite(&allOne,sizeof(unsigned int),1,textPositionFile);
    } else {
        OCCSection++;
    }
}