1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
|
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "os.h"
#include "align.h"
#include "align_lib_old.h"
FastInt W128[128][128] = {{0},{0}};
char base_val[128] = {0};
static FastInt (*align_arr[])() = {align_ss,
align_sv,
/*balign_sv*/NULL,
align_ss2};
static void (*display_arr[])() = {display_ss,
display_sv,
display_sv,
display_ss2};
static void (*expand_arr[])() = {expand,
expand_6,
expand_6,
expand};
/*
* Initialise 128x128 weight matrix from our score matrix
*/
void init_align_mat(int **matrix,
char *order, int unknown,
FastInt W128[][128]) {
int i, j, i_end;
unsigned char ci, cj;
for (i = 0; i < 128; i++) {
for (j = 0; j < 128; j++) {
W128[i][j] = unknown;
}
}
i_end = strlen(order);
for (i = 0; i < i_end; i++) {
ci = order[i];
for (j = 0; j < i_end; j++) {
cj = order[j];
W128[ ci ][ cj ] = matrix[i][j];
W128[tolower(ci)][ cj ] = matrix[i][j];
W128[ ci ][tolower(cj)] = matrix[i][j];
W128[tolower(ci)][tolower(cj)] = matrix[i][j];
}
}
for (i=0; i<128; i++)
base_val[i] = 5;
base_val['A'] = 0;
base_val['a'] = 0;
base_val['C'] = 1;
base_val['c'] = 1;
base_val['G'] = 2;
base_val['g'] = 2;
base_val['T'] = 3;
base_val['t'] = 3;
base_val['U'] = 3;
base_val['u'] = 3;
base_val['*'] = 4;
/* base_val['-'] = 5; the default */
}
void init_W128(int **matrix,
char *order, int unknown) {
init_align_mat(matrix, order, unknown, W128);
}
/*
* Generic alignment interface:
*
* Job 0 = Sequence against sequence global aligment
* Job 1 = Sequence against vector global aligment
* Job 2 = vector against vector global aligment (unimplemented)
* Job 3 = Job 0 except with end gaps no penalised (to replace 0 eventually)
*
* Passing low_band == 0 && high_band == 0 implies using the full, rather than
* band, algorithm.
* Currently the band algorithm does not work so well always use the full.
*
* Passing rseq1 and rseq2 as non NULL implies storing the new aligned
* sequences in rseq1 and rseq2.
*
* Protein alignment only works for sequence against sequence.
*
* Return values:
* -1 for error
* >0 for success (value returned is score)
*/
int calignm(void *seq1, void *seq2, int len1, int len2,
void *rseq1, void *rseq2, int *rlen1, int *rlen2,
int low_band, int high_band, int gap_open, int gap_extend,
int job, int is_protein, align_int *res, FastInt W[][128]) {
/* int full = low_band == 0 && high_band == 0; */
int retval = -1;
FastInt *results;
int ajob = job & ALIGN_J_MASK;
if (ajob < 0 || ajob > 3) {
verror(ERR_FATAL, "align", "unknown job %d", ajob);
return -1;
}
if (res == NULL) {
if (NULL == (results = (FastInt *)malloc((len1+len2) *
sizeof(FastInt)))) {
verror(ERR_FATAL, "align", "not enough memory");
return -1;
}
} else {
results = (FastInt *)res;
}
retval = align_arr[ajob](seq1, seq2, (FastInt)len1, (FastInt)len2,
(FastInt)low_band, (FastInt)high_band, W,
(FastInt)gap_open, (FastInt)gap_extend,
(FastInt *)results,
/* End gaps */
(job & ALIGN_GAP_S1) ? 1 : 0,
(job & ALIGN_GAP_S2) ? 1 : 0,
(job & ALIGN_GAP_E1) ? 1 : 0,
(job & ALIGN_GAP_E2) ? 1 : 0
);
/* non vectors */
if (rseq1 && rseq2) {
if (retval != -1) {
expand_arr[ajob](seq1, seq2, len1, len2,
rseq1, rseq2, rlen1, rlen2,
results, job & ALIGN_J_PADS);
}
}
if (res == NULL)
free (results);
return retval;
}
/*
* As above, but hardcoded to use W128
*/
int calign(void *seq1, void *seq2, int len1, int len2,
void *rseq1, void *rseq2, int *rlen1, int *rlen2,
int low_band, int high_band, int gap_open, int gap_extend,
int job, int is_protein, align_int *res) {
return calignm(seq1, seq2, len1, len2, rseq1, rseq2, rlen1, rlen2,
low_band, high_band, gap_open, gap_extend,
job, is_protein, res, W128);
}
/*
* Generic alignment display interface
*
* Job 0 = Sequence against sequence display
* Job 1 = Sequence against vector display
* Job 2 = vector against vector display (unimplemented)
* Job 3 = Job 0 except with end gaps no penalised (to replace 0 eventually)
*
*/
void cdisplay(void *seq1, void *seq2, int len1, int len2, int job,
align_int *results, int pos1, int pos2) {
display_arr[job & ALIGN_J_MASK](seq1, seq2, len1, len2,
results, pos1, pos2);
}
/*
* Generic alignment expansion interface
*
* Job 0 = Sequence against sequence expand
* Job 1 = Sequence against vector expand
* Job 2 = vector against vector expand (unimplemented)
*
* seq1 and seq2 are the original sequences. Expanded sequences will be
* returned in rseq1 and rseq2 (it is expected that the caller allocate
* memory for these buffers) and the lengths in rlen1 and rlen2.
*
*/
void cexpand(void *seq1, void *seq2, int len1, int len2,
void *rseq1, void *rseq2, int *rlen1, int *rlen2,
int job, align_int *results) {
expand_arr[job & ALIGN_J_MASK](seq1, seq2, len1, len2,
rseq1, rseq2, rlen1, rlen2,
results, job & ALIGN_J_PADS);
}
/*
* FORTRAN interfaces
*/
int_f align_(void *seq1, int_f *len1, void *seq2, int_f *len2,
void *rseq1, int_f *rlen1, void *rseq2, int_f *rlen2,
int_f *low_band, int_f *high_band,
int_f *gap_open, int_f *gap_extend,
int_f *job, int_f *is_protein,
int_fl seq1_l, int_fl seq2_l, int_fl rseq1_l, int_fl rseq2_l) {
return calign(seq1, seq2, *len1, *len2, rseq1, rseq2, rlen1, rlen2,
*low_band, *high_band, *gap_open, *gap_extend,
*job, *is_protein, NULL);
}
|