File: read_matrix.cpp

package info (click to toggle)
staden 2.0.0%2Bb11-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 21,584 kB
  • sloc: ansic: 240,605; tcl: 65,360; cpp: 12,854; makefile: 11,203; sh: 3,023; fortran: 2,033; perl: 63; awk: 46
file content (145 lines) | stat: -rw-r--r-- 3,558 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
 * Routines to read the blast matrix file formats.
 *
 * This is a lash-up designed to work specifically with the DNA matrices.
 */

#include <staden_config.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
extern "C" {
#include "misc.h"
}
#include "read_matrix.h"

/*
 * Creates a score matrix by loading the blast format matrix from file 'fn'.
 * The axis of the matrix are the characters in base_order, with the matrix
 * size being strlen(base_order) in each direction.
 * Elements listed in base_order that do not appear in the file are entered
 * in the matrix as zero. (Should compute average and use this?)
 *
 * The matrix is allocated as an array of strlen(base_order) pointers to
 * arrays of strlen(base_order) integers.
 *
 * Returns the matrix for success, or NULL for failure.
 */
int **create_matrix(char *fn, char *base_order) {
    int **matrix;
    int len = strlen(base_order);
    int i, j, ncols = 0;
    FILE *fp;
    signed char lookup[256];
    int first_line = 1;
    char line[1024], *linep;
    signed char cols[256];

    /* Open the file */
    if (NULL == (fp = fopen(fn, "r")))
   return NULL;

    /* Allocate matrix */
    if (NULL == (matrix = (int **)xmalloc(len * sizeof(int *))))
   return NULL;

    for (i = 0; i < len; i++) {
   if (NULL == (matrix[i] = (int *)xcalloc(len, sizeof(int))))
       return NULL;
    }

    /* Initialise our character value to base_order index translation. */
    memset(lookup, -1, 256);
    for (i = 0; i < len; i++) {
   lookup[toupper(base_order[i])] = i;
   lookup[tolower(base_order[i])] = i;
    }

    /*
     * Read the file itself.
     * Lines starting with '#' are comments.
     * After that, the first line indicates the order of elements in the cols.
     * Then we have the rows with data values.
     */

    while(fgets(line, 1024, fp)) {
   /* Skip comments */
   if (line[0] == '#')
       continue;
       
   if (first_line) {
       /*
        * For the first line, we initialise a translation from
        * column characters in file to indices into 'matrix'.
        */
       first_line = 0;
       for (j = i = 0, linep = line; *linep; linep++) {
      if (isspace(*linep))
          continue;
      cols[j++] = lookup[*linep];
       }
       ncols = j;
   } else {
       /*
        * Otherwise, we read the first character, and then fill in
        * 'matrix' column by column.
        */
       int row_index, element;

       for (linep = line; *linep && isspace(*linep); linep++)
      ;
       row_index = lookup[*linep++];
       if (row_index != -1) {
      for (j = 0; j < ncols; j++) {
          element = strtol(linep, &linep, 10);
          if (cols[j] != -1)
         matrix[row_index][cols[j]] = element;
      }
       }
   }
    }

    /* Tidy up */
    fclose(fp);

#if 0
    /* Debug - print out the matrix */
    putchar(' ');
    for (j = 0; j < len; j++) {
   printf("   %c", base_order[j]);
    }
    putchar('\n');
    for (j = 0; j < len; j++) {
   printf("%c ", base_order[j]);
   for (i = 0; i < len; i++) {
       printf("%3d ", matrix[i][j]);
   }
   putchar('\n');
    }
#endif
    
    return matrix;
}


/*
 * Free a matrix which has been allocated by create_matrix().
 * We still need to pass in base_order as this is how we indicate the size of
 * the matrix.
 */
void free_matrix(int **matrix, char *base_order) {
    int i, len = strlen(base_order);

    if (!matrix)
   return;

    for (i = 0; i < len; i++) {
   if (matrix[i]) xfree(matrix[i]);
    }
    xfree(matrix);

    return;
}