1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
|
/**
*
* struct.h: Basic data structures
*
* Author: A.R.Subramanian
*/
/**
* score matrix (e.g. BLOSUM62)
*/
struct scr_matrix
{
int length; // number of amino acids
int max_score; // maximum among all entries in the data array
int *char2num; // resolves the character of an amino acid to its number
int *num2char; // resolves the number of an amino acid to its character
int *data; // contains the matrix indexed by the number of the particular amino acid
int *dist; // number of pairs of amino acids (i,j) having score at equal to // the value of the index
long double **raw_dist;
double avg_sim_score;
};
/**
* raw sequence
*/
struct seq
{
char *data; // sequence data
char *name; // name/description of the sequence
int num; // number of the sequence
int length; // length of sequence
int max_seen;
char *dna_num; // Numbers fo retranslation from Protein to DNA
int orf_frame; // reading frame of the longest orf
char crick_strand; // orf translation or sequence translation on crickstrand
};
/**
* sequence collection
*/
struct seq_col
{
struct seq *seqs; // array of the sequences
int avg_length; // average length of sequences
int length; // number of sequences
};
/**
* probability distribution of scores in diagonals
*/
struct prob_dist {
struct scr_matrix *smatrix; // pointer to the associated score matrix
long double **data; // distribution of scores dist[i][j] contains the
// probability of a diags of length i having score >=j
double **log_data; // distribution of scores dist[i][j] contains the
// -log(probability of a diags of length i having score >=j)
// long double *expect; // score expectancy for each diaglen
unsigned int max_dlen; // maximum diaglength in dist
};
/**
* part of a sequence (auxiliary data structure)
*/
struct seq_part {
int num; // a number that indicates a position in an array
struct seq* sq; // the pointer to the sequence
int startpos; // startpos in the sequence
//int leftmargin;
//int rightmargin;
};
/**
* diagonal in the dotmatrix
*/
struct diag {
struct seq_part seq_p1; // first sequence part
struct seq_part seq_p2; // seconde sequence part
unsigned int length; // length of the diag
long score; // score of the diag
long orig_score; // orig score of the diag
struct diag *pred_diag; // predecessor diag for dynamic programming
struct diag *col_pred_diag; // col predecessor diag for dynamic programming
int pool_pos; // position in diag pool
char meetsThreshold; // whether diag meets threshold
// for vertex cover
int degree;
int max_degree;
struct diag **neighbours;
char anchor; // if this is an anchor diag
char marked; // marking flag for arbitrary use
char multi_dg; // is >0 if this is a multi dg
struct diag **multi_cont; // the contained dgs of this is a multi dg
int multi_length; // size of multi_cont
// char onlyOverThres;
double weight; // weight of the diag = -log(prob)
double weight_sum; // weight sum for dialign
double weight_fac; // weight factor
double ov_weight; // overlap weight
double total_weight; // total_weight = weight+o_weight
};
/**
* collection of diag
*/
struct simple_diag_col {
unsigned int length; // number of diags
double total_weight; // total weight
double weight_fac; // weight factor
struct diag** data; // the array of diags
};
/**
* guide tree node
*/
struct gt_node {
char isLeaf; // whether it is leaf
int *seq_num; // the sequence numbers
int seq_num_length; // length of sequence numbers array
struct gt_node *succ1; // successor nodes
struct gt_node *succ2;
};
/**
* vertex cover node
struct vc_node {
double weight;
struct diag *dg;
int degree;
struct vc_node *adjacents;
}
*/
/**
* collection of all diagonals sorted by the sequences
*/
struct diag_col {
int seq_amount; // number of sequences involved
struct simple_diag_col** diag_matrix; // diag_matrix[i +seq_amount *j] contains
// all diags found involving the sequences i and j
double total_weight; // total weight
double average_weight; // average_weight
struct diag** diags; // all diags unordered
unsigned int diag_amount; // number of diags found
struct gt_node *gt_root;
};
/**
* diag container
*/
struct diag_cont {
struct diag* dg;
struct diag_cont *next;
};
/**
* alignment position
*/
struct algn_pos {
// int seq_num; // sequence number
// unsigned long pos_in_seq; // position in the sequence
char state; // orphane: not aligned to any pos,
struct diag_cont *dg_cont; // diags that are aligned with that position
int row;
int col;
// unsigned int succFPos; // if orphane, the position holding the succF
// unsigned int predFPos; // analogous to succFPos
// char* isAli; // if alignemnt at the positions exist
int* predF; // predecessor frontier, only filled if non-orphane
int* succF; // successor frontier, only filled if non-orphane
char *proceed; // for output
//char isInherited; // whether the pointers are inherited
int predFPos; // in case of orphane, where to find the predF or succF
int succFPos;
int *eqcAlgnPos; // equivalence class minimum alignment position (for output)
struct algn_pos *eqcParent; // equivalence class parent
int eqcRank; // equivalence class rank (>= maximum number of children)
// unsigned int *maxpos; // needed for output of the alignment
};
/**
* alignment
*/
struct alignment {
//int seq_amount; // number of sequences involved
//char *redo_seqs; // which pairs of sequences are to be aligned again
char *seq_is_orphane; // boolean array indicating for each sequence
// whether it is orphane or not
int max_pos; // the greatest position in the alignment (including all -'s)
// if <0: the alignment has not yet been prepared
struct seq_col *scol; // all the sequences involved
struct algn_pos **algn; // the alignment
double total_weight; // the total weight of the alignment
struct alignment *next; // pointer to next alignment in the sorted linked list
//struct alignment *prev; // pointer to previous alignment in the sorted linked list
//unsigned long pos; // position in the sorted linked list
//struct diag** aligned_diags; // all aligned diags
//int aligned_diags_amount;
//int max_aligned_diags_amount;
//int orig_max_aligned_diags_amount;
//struct diag_cont* backlog_diags; // all backlog diags
};
|