1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
|
/*
* Routines to read the blast matrix file formats.
*
* This is a lash-up designed to work specifically with the DNA matrices.
*/
#include <staden_config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
extern "C" {
#include "misc.h"
}
#include "read_matrix.h"
/*
* Creates a score matrix by loading the blast format matrix from file 'fn'.
* The axis of the matrix are the characters in base_order, with the matrix
* size being strlen(base_order) in each direction.
* Elements listed in base_order that do not appear in the file are entered
* in the matrix as zero. (Should compute average and use this?)
*
* The matrix is allocated as an array of strlen(base_order) pointers to
* arrays of strlen(base_order) integers.
*
* Returns the matrix for success, or NULL for failure.
*/
int **create_matrix(char *fn, char *base_order) {
int **matrix;
int len = strlen(base_order);
int i, j, ncols = 0;
FILE *fp;
signed char lookup[256];
int first_line = 1;
char line[1024], *linep;
signed char cols[256];
/* Open the file */
if (NULL == (fp = fopen(fn, "r")))
return NULL;
/* Allocate matrix */
if (NULL == (matrix = (int **)xmalloc(len * sizeof(int *))))
return NULL;
for (i = 0; i < len; i++) {
if (NULL == (matrix[i] = (int *)xcalloc(len, sizeof(int))))
return NULL;
}
/* Initialise our character value to base_order index translation. */
memset(lookup, -1, 256);
for (i = 0; i < len; i++) {
lookup[toupper(base_order[i])] = i;
lookup[tolower(base_order[i])] = i;
}
/*
* Read the file itself.
* Lines starting with '#' are comments.
* After that, the first line indicates the order of elements in the cols.
* Then we have the rows with data values.
*/
while(fgets(line, 1024, fp)) {
/* Skip comments */
if (line[0] == '#')
continue;
if (first_line) {
/*
* For the first line, we initialise a translation from
* column characters in file to indices into 'matrix'.
*/
first_line = 0;
for (j = i = 0, linep = line; *linep; linep++) {
if (isspace(*linep))
continue;
cols[j++] = lookup[*linep];
}
ncols = j;
} else {
/*
* Otherwise, we read the first character, and then fill in
* 'matrix' column by column.
*/
int row_index, element;
for (linep = line; *linep && isspace(*linep); linep++)
;
row_index = lookup[*linep++];
if (row_index != -1) {
for (j = 0; j < ncols; j++) {
element = strtol(linep, &linep, 10);
if (cols[j] != -1)
matrix[row_index][cols[j]] = element;
}
}
}
}
/* Tidy up */
fclose(fp);
#if 0
/* Debug - print out the matrix */
putchar(' ');
for (j = 0; j < len; j++) {
printf(" %c", base_order[j]);
}
putchar('\n');
for (j = 0; j < len; j++) {
printf("%c ", base_order[j]);
for (i = 0; i < len; i++) {
printf("%3d ", matrix[i][j]);
}
putchar('\n');
}
#endif
return matrix;
}
/*
* Free a matrix which has been allocated by create_matrix().
* We still need to pass in base_order as this is how we indicate the size of
* the matrix.
*/
void free_matrix(int **matrix, char *base_order) {
int i, len = strlen(base_order);
if (!matrix)
return;
for (i = 0; i < len; i++) {
if (matrix[i]) xfree(matrix[i]);
}
xfree(matrix);
return;
}
|