1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
/*
* rnamat.h
*
* Header file for API for RNA matrix routines. Used in parsing alignment
* into matrix and later reading in matrix.
*
* Robert J. Klein
* February 25, 2002
*/
#ifndef _RNAMAT_H
#define _RNAMAT_H
#include "esl_config.h"
#include "config.h"
#include "easel.h"
#include "esl_msa.h"
#include "structs.h"
#define RNAPAIR_ALPHABET "AAAACCCCGGGGUUUU"
#define RNAPAIR_ALPHABET2 "ACGUACGUACGUACGU"
/*
* Matrix type
*
* Contains array in one dimension (to be indexed later), matrix size,
* H, and E.
*/
typedef struct _matrix_t {
double *matrix;
int edge_size; /* Size of one edge, e.g. 4 for 4x4 matrix */
int full_size; /* Num of elements, e.g. 10 for 4x4 matirx */
double H;
double E;
} matrix_t;
/*
* Full matrix definition, includes the g background freq vector (added by EPN).
*/
typedef struct _fullmat_t {
const ESL_ALPHABET *abc;/* alphabet, we enforce it's eslRNA */
matrix_t *unpaired;
matrix_t *paired;
char *name;
float *g; /* EPN: the background distro, g vector in RSEARCH paper
* this now appears in the RIBOSUM matrix files */
int scores_flag; /* TRUE if matrix values are log odds scores, FALSE if
* they're target probs, or unfilled */
int probs_flag; /* TRUE if matrix values are target probs, FALSE if
* they're log odds scores, or unfilled */
} fullmat_t;
/* Returns true if pos. C of seq B of msa A is a gap */
#define is_rna_gap(A, B, C) (esl_abc_CIsGap(A->abc, A->aseq[B][C]))
/* Returns true if position C of digitized sequence B of msa A is a canonical */
#define is_defined_rna_nucleotide(A, B, C) (esl_abc_CIsCanonical(A->abc, A->aseq[B][C]))
/*
* Maps c as follows
*
* A->0
* C->1
* G->2
* T->3
* U->3
* else -> 4
*/
int numbered_nucleotide (char c);
/*
* Maps base pair c,d as follows:
*
* AA -> 0
* AC -> 1
* ....
* TG -> 15
* TT -> 16 (T==U)
* Anything else maps to -1
*/
int numbered_basepair (char c, char d);
/*
* Maps to index of matrix, using binary representation of
* nucleotides (unsorted).
*
* See lab book 7, p. 3-4 for details of mapping function
*/
#define matrix_index(X,Y) ((X>Y) ? X*(X+1)/2+Y: Y*(Y+1)/2+X)
#define unpairedmat_size (matrix_index(3,3) + 1)
#define pairedmat_size (matrix_index (15,15) + 1)
/*
* Setup the matrix by allocating matrix in two dimensions as triangle.
* Initialize to 0.0
*/
matrix_t *setup_matrix (int size);
/*
* Actually count the basepairs and gaps into the fullmat simply by summing
* to existing values there. Also counts nt counts to background_nt
*/
void count_matrix (ESL_MSA *msa, fullmat_t *fullmat, double *background_nt,
int cutoff_perc, int product_weights);
/*
* Prints the matrix
*/
void print_matrix (FILE *fp, fullmat_t *fullmat);
/*
* Read the matrix from a file
*/
fullmat_t *ReadMatrix(const ESL_ALPHABET *abc, FILE *matfp);
/*
* Opens matrix file
*/
FILE *MatFileOpen (char *matfile);
/*
* Reports minium allowed sum of alpha + beta for matrix
*/
float get_min_alpha_beta_sum (fullmat_t *fullmat);
/* Free a fullmat_t object */
void FreeMat(fullmat_t *fullmat);
/* convert a matrix with log odds scores to target freqs */
int ribosum_calc_targets(fullmat_t *fullmat);
/* resolve degeneracies in a single seq MSA by replacing
* with most likely target residue within degenerate alphabet */
int ribosum_MSA_resolve_degeneracies(fullmat_t *fullmat, ESL_MSA *msa);
/*
* Maps i as follows:
* 0->A
* 1->C
* 2->G
* 3->U
* else->-1
*/
int unpaired_res (int i);
#endif
|