File: rnamat.h

package info (click to toggle)
infernal 1.1.2-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 62,408 kB
  • sloc: ansic: 209,575; perl: 12,119; sh: 5,676; makefile: 2,823
file content (150 lines) | stat: -rw-r--r-- 3,553 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/*
 * rnamat.h
 * 
 * Header file for API for RNA matrix routines.  Used in parsing alignment
 * into matrix and later reading in matrix.
 *
 * Robert J. Klein
 * February 25, 2002
 */

#ifndef _RNAMAT_H
#define _RNAMAT_H

#include "esl_config.h"
#include "config.h"

#include "easel.h"
#include "esl_msa.h"

#include "structs.h"

#define RNAPAIR_ALPHABET "AAAACCCCGGGGUUUU"
#define RNAPAIR_ALPHABET2 "ACGUACGUACGUACGU"

/*
 * Matrix type
 *
 * Contains array in one dimension (to be indexed later), matrix size,
 * H, and E. 
 */
typedef struct _matrix_t {
  double *matrix;
  int edge_size;         /* Size of one edge, e.g. 4 for 4x4 matrix */
  int full_size;         /* Num of elements, e.g. 10 for 4x4 matirx */
  double H;
  double E;
} matrix_t;

/*
 * Full matrix definition, includes the g background freq vector (added by EPN). 
 */
typedef struct _fullmat_t {
  const ESL_ALPHABET *abc;/* alphabet, we enforce it's eslRNA */
  matrix_t *unpaired;
  matrix_t *paired;
  char     *name;
  float    *g;           /* EPN: the background distro, g vector in RSEARCH paper
			  * this now appears in the RIBOSUM matrix files */
  int       scores_flag; /* TRUE if matrix values are log odds scores, FALSE if 
			  * they're target probs, or unfilled */
  int       probs_flag;  /* TRUE if matrix values are target probs, FALSE if 
			  * they're log odds scores, or unfilled */
} fullmat_t;

/* Returns true if pos. C of seq B of msa A is a gap */
#define is_rna_gap(A, B, C) (esl_abc_CIsGap(A->abc, A->aseq[B][C]))

/* Returns true if position C of digitized sequence B of msa A is a canonical */
#define is_defined_rna_nucleotide(A, B, C) (esl_abc_CIsCanonical(A->abc, A->aseq[B][C]))

/*
 * Maps c as follows
 *
 * A->0
 * C->1
 * G->2
 * T->3
 * U->3
 * else -> 4
 */
int numbered_nucleotide (char c);

/*
 * Maps base pair c,d as follows:
 *
 * AA -> 0
 * AC -> 1
 * ....
 * TG -> 15
 * TT -> 16 (T==U)
 * Anything else maps to -1
 */
int numbered_basepair (char c, char d);

/*
 * Maps to index of matrix, using binary representation of
 * nucleotides (unsorted).
 *
 * See lab book 7, p. 3-4 for details of mapping function
 */
#define matrix_index(X,Y) ((X>Y) ? X*(X+1)/2+Y: Y*(Y+1)/2+X)

#define unpairedmat_size (matrix_index(3,3) + 1)
#define pairedmat_size (matrix_index (15,15) + 1)

/*
 * Setup the matrix by allocating matrix in two dimensions as triangle.
 * Initialize to 0.0
 */
matrix_t *setup_matrix (int size);

/*
 * Actually count the basepairs and gaps into the fullmat simply by summing
 * to existing values there.  Also counts nt counts to background_nt
 */
void count_matrix (ESL_MSA *msa, fullmat_t *fullmat, double *background_nt,
		   int cutoff_perc, int product_weights);

/*
 * Prints the matrix
 */
void print_matrix (FILE *fp, fullmat_t *fullmat);

/*
 * Read the matrix from a file
 */
fullmat_t *ReadMatrix(const ESL_ALPHABET *abc, FILE *matfp);

/*
 * Opens matrix file
 */
FILE *MatFileOpen (char *matfile);

/*
 * Reports minium allowed sum of alpha + beta for matrix 
 */
float get_min_alpha_beta_sum (fullmat_t *fullmat);

/* Free a fullmat_t object */    
void FreeMat(fullmat_t *fullmat);

/* convert a matrix with log odds scores to target freqs */
int ribosum_calc_targets(fullmat_t *fullmat);

/* resolve degeneracies in a single seq MSA by replacing
 * with most likely target residue within degenerate alphabet */
int ribosum_MSA_resolve_degeneracies(fullmat_t *fullmat, ESL_MSA *msa);

/*
 * Maps i as follows:
 * 0->A
 * 1->C
 * 2->G
 * 3->U
 * else->-1
 */
int unpaired_res (int i);

#endif