1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
/****************************************************************************
*
* Copyright (c) 2003, The Institute for Genomic Research (TIGR), Rockville,
* Maryland, U.S.A. All rights reserved.
*
****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "utils.h"
extern void giveup(char *);
static int Dlist[]={ 0, 2, 3}, Hlist[]={ 0, 1, 3},
rev_code[]={ 3, 2, 1, 0, 0, 6, 5, 10, 8, 9, 7, 12, 11, 14, 13, 15};
int tag_size;
static int tag_len;
static unsigned tag_mask;
static unsigned *tag_well;
int tag_code(c)
int c;
{
register int tmp;
switch (c) {
case 0: tmp=0; break;
case 1: tmp=1; break;
case 2: tmp=2; break;
case 3:
case 4: tmp=3; break;
case 5: tmp=random()%2 ? 2 : 0; break;
case 6: tmp=random()%2 ? 3 : 1; break;
case 7: tmp=random()%2 ; break;
case 8: tmp=random()%2 ? 3 : 0; break;
case 9: tmp=random()%2 ? 2 : 1; break;
case 10: tmp=random()%2 ? 3 : 2; break;
case 11: tmp=Dlist[random()%3]; break;
case 12: tmp=Hlist[random()%3]; break;
case 13: tmp=random()%3; break;
case 14: tmp=random()%3+1; break;
case 15: tmp=random()%4; break;
default:
giveup("how can other cases happen in tag_code?");
}
return tmp;
}
void tag_sort(l, r)
unsigned *l, *r;
{
register unsigned v, *i, *j, tmp;
if (r>l) {
v = *r; i = l-1; j = r;
while (1) {
while (*(++i) < v) ;
while (j>l && *(--j) > v) ;
if (i>=j) break;
tmp = *i; *i = *j; *j = tmp;
}
tmp = *i; *i = *r; *r = tmp;
tag_sort(l, i-1);
tag_sort(i+1, r);
}
}
void construct_vector_tags(seq, len)
char *seq;
int len;
{
register int i, j, k;
register unsigned accf, accr;
/* Note: this breaks the vector into non-overlapping tags of size tag_size */
tag_len=len/tag_size+(len%tag_size ? 1 : 0);
tag_well=(unsigned*)malloc(sizeof(unsigned)*tag_len*2);
if (tag_well==NULL)
giveup("Memory allocation failure.");
for (i=k=0; i<tag_len; i++) {
accf=accr=0;
for (j=0; j<tag_size; j++, k++) {
accf<<=2;
accf|=tag_code(seq[k%len]);
accr<<=2;
accr|=tag_code(rev_code[seq[len-k%len-1]]);
}
tag_well[i]=accf;
tag_well[i+tag_len]=accr;
}
tag_len*=2;
tag_sort(tag_well, &tag_well[tag_len-1]);
for (i=j=0; j<tag_len; j++)
if (tag_well[i]!=tag_well[j])
tag_well[++i]=tag_well[j];
tag_len=i+1;
for (accf=i=0; i<tag_size; i++) {
accf<<=2;
accf|=3;
}
tag_mask=accf;
}
int match_vector_tags(seq, len)
char *seq;
int len;
{
register unsigned acc;
register int i, l, r, m, hits, last_hit;
acc=0;
for (i=0; i<tag_size-1; i++, acc<<=2)
acc|=tag_code(seq[i]);
for (hits=0, last_hit=-1; i<len; i++, acc<<=2) {
acc&=tag_mask;
acc|=tag_code(seq[i]);
for (l=0, r=tag_len-1; l<=r; ) {
m=(l+r)/2;
if (acc<tag_well[m])
r=m-1;
else if (acc>tag_well[m])
l=m+1;
else {
/* this tag matches a vector tag -- count all bases in the tag as matching, */
/* without double-counting any bases from the previous hit */
if (i-last_hit>=tag_size)
hits+=tag_size;
else
hits+=i-last_hit;
last_hit=i;
break;
}
}
}
/* return the total number of matching bases */
return hits;
}
void destroy_vector_tags()
{
free(tag_well);
}
|