File: align.c

package info (click to toggle)
staden 2.0.0%2Bb11-5
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 21,568 kB
  • sloc: ansic: 240,605; tcl: 65,360; cpp: 12,854; makefile: 11,201; sh: 2,952; fortran: 2,033; perl: 63; awk: 46
file content (206 lines) | stat: -rw-r--r-- 5,801 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#include <stdio.h>
#include <ctype.h>
#include <string.h>

#include "os.h"
#include "align.h"
#include "align_lib_old.h"

FastInt W128[128][128] = {{0},{0}};
char base_val[128] = {0};

static FastInt (*align_arr[])() = {align_ss,  
				   align_sv,
				   /*balign_sv*/NULL,
				   align_ss2};
static void (*display_arr[])()  = {display_ss,
				   display_sv,
				   display_sv,
				   display_ss2};
static void (*expand_arr[])()   = {expand,
				   expand_6,
				   expand_6,
				   expand};

/*
 * Initialise 128x128 weight matrix from our score matrix
 */
void init_align_mat(int **matrix,
		    char *order, int unknown,
		    FastInt W128[][128]) {
    int i, j, i_end;
    unsigned char ci, cj;

    for (i = 0; i < 128; i++) {
        for (j = 0; j < 128; j++) {
	    W128[i][j] = unknown;
        }
    }

    i_end = strlen(order);
    for (i = 0; i < i_end; i++) {
	ci = order[i];
	for (j = 0; j < i_end; j++) {
	    cj = order[j];
	    W128[        ci ][        cj ] = matrix[i][j];
	    W128[tolower(ci)][        cj ] = matrix[i][j];
	    W128[        ci ][tolower(cj)] = matrix[i][j];
	    W128[tolower(ci)][tolower(cj)] = matrix[i][j];
	}
    }

    for (i=0; i<128; i++)
	base_val[i] = 5;
    
    base_val['A'] = 0;
    base_val['a'] = 0;
    base_val['C'] = 1;
    base_val['c'] = 1;
    base_val['G'] = 2;
    base_val['g'] = 2;
    base_val['T'] = 3;
    base_val['t'] = 3;
    base_val['U'] = 3;
    base_val['u'] = 3;
    base_val['*'] = 4;
    /* base_val['-'] = 5; the default */
}

void init_W128(int **matrix,
	       char *order, int unknown) {
    init_align_mat(matrix, order, unknown, W128);
}

/*
 * Generic alignment interface:
 *
 * Job 0 = Sequence against sequence global aligment
 * Job 1 = Sequence against vector   global aligment
 * Job 2 = vector   against vector   global aligment (unimplemented)
 * Job 3 = Job 0 except with end gaps no penalised (to replace 0 eventually)
 *
 * Passing low_band == 0 && high_band == 0 implies using the full, rather than
 * band, algorithm.
 * Currently the band algorithm does not work so well always use the full.
 *
 * Passing rseq1 and rseq2 as non NULL implies storing the new aligned
 * sequences in rseq1 and rseq2.
 *
 * Protein alignment only works for sequence against sequence.
 *
 * Return values:
 *   -1 for error
 *   >0 for success (value returned is score)
 */
int calignm(void *seq1, void *seq2, int len1, int len2,
	    void *rseq1, void *rseq2, int *rlen1, int *rlen2,
	    int low_band, int high_band, int gap_open, int gap_extend,
	    int job, int is_protein, align_int *res, FastInt W[][128]) {

    /* int full = low_band == 0 && high_band == 0; */
    int retval = -1;
    FastInt *results;
    int ajob = job & ALIGN_J_MASK;

    if (ajob < 0 || ajob > 3) {
	verror(ERR_FATAL, "align", "unknown job %d", ajob);
	return -1;
    }

    if (res == NULL) {
	if (NULL == (results = (FastInt *)malloc((len1+len2) *
						 sizeof(FastInt)))) {
	    verror(ERR_FATAL, "align", "not enough memory");
	    return -1;
	}
    } else {
	results = (FastInt *)res;
    }

    retval = align_arr[ajob](seq1, seq2, (FastInt)len1, (FastInt)len2,
			     (FastInt)low_band, (FastInt)high_band, W,
			     (FastInt)gap_open, (FastInt)gap_extend,
			     (FastInt *)results,
			     /* End gaps */
			     (job & ALIGN_GAP_S1) ? 1 : 0,
			     (job & ALIGN_GAP_S2) ? 1 : 0,
			     (job & ALIGN_GAP_E1) ? 1 : 0,
			     (job & ALIGN_GAP_E2) ? 1 : 0
			     );

    /* non vectors */
    if (rseq1 && rseq2) {
	if (retval != -1) {
	    expand_arr[ajob](seq1, seq2, len1, len2,
			     rseq1, rseq2, rlen1, rlen2,
			     results, job & ALIGN_J_PADS);
	}
    }

    if (res == NULL)
	free (results);

    return retval;
}

/*
 * As above, but hardcoded to use W128
 */
int calign(void *seq1, void *seq2, int len1, int len2,
	   void *rseq1, void *rseq2, int *rlen1, int *rlen2,
	   int low_band, int high_band, int gap_open, int gap_extend,
	   int job, int is_protein, align_int *res) {
    return calignm(seq1, seq2, len1, len2, rseq1, rseq2, rlen1, rlen2,
		   low_band, high_band, gap_open, gap_extend,
		   job, is_protein, res, W128);
}

/*
 * Generic alignment display interface
 *
 * Job 0 = Sequence against sequence display
 * Job 1 = Sequence against vector   display
 * Job 2 = vector   against vector   display (unimplemented)
 * Job 3 = Job 0 except with end gaps no penalised (to replace 0 eventually)
 *
 */
void cdisplay(void *seq1, void *seq2, int len1, int len2, int job,
	      align_int *results, int pos1, int pos2) {
    display_arr[job & ALIGN_J_MASK](seq1, seq2, len1, len2,
				    results, pos1, pos2);
}

/*
 * Generic alignment expansion interface
 *
 * Job 0 = Sequence against sequence expand
 * Job 1 = Sequence against vector   expand
 * Job 2 = vector   against vector   expand (unimplemented)
 *
 * seq1 and seq2 are the original sequences. Expanded sequences will be
 * returned in rseq1 and rseq2 (it is expected that the caller allocate
 * memory for these buffers) and the lengths in rlen1 and rlen2.
 *
 */
void cexpand(void *seq1, void *seq2, int len1, int len2,
	     void *rseq1, void *rseq2, int *rlen1, int *rlen2,
	     int job, align_int *results) {
    expand_arr[job & ALIGN_J_MASK](seq1, seq2, len1, len2,
		    rseq1, rseq2, rlen1, rlen2,
		    results, job & ALIGN_J_PADS);
}


/*
 * FORTRAN interfaces
 */
int_f align_(void *seq1, int_f *len1, void *seq2, int_f *len2,
	    void *rseq1, int_f *rlen1, void *rseq2, int_f *rlen2,
	    int_f *low_band, int_f *high_band,
	    int_f *gap_open, int_f *gap_extend,
	    int_f *job, int_f *is_protein,
	    int_fl seq1_l, int_fl seq2_l, int_fl rseq1_l, int_fl rseq2_l) {
    return calign(seq1, seq2, *len1, *len2, rseq1, rseq2, rlen1, rlen2,
		  *low_band, *high_band, *gap_open, *gap_extend,
		  *job, *is_protein, NULL);
}