File: struct.h

package info (click to toggle)
dialign-t 1.0.2-15
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,244 kB
  • sloc: ansic: 5,822; xml: 355; makefile: 34; sh: 25
file content (222 lines) | stat: -rwxr-xr-x 7,117 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
/**
 *
 * struct.h: Basic data structures
 *
 * Author:  A.R.Subramanian
 */


/**
 * score matrix (e.g. BLOSUM62)
 */
struct scr_matrix
{
  int length;   // number of amino acids
  int max_score; // maximum among all entries in the data array
  int *char2num; // resolves the character of an amino acid to its number
  int *num2char; // resolves the number of an amino acid to its character
  int *data;     // contains the matrix indexed by the number of the particular amino acid
  int *dist;   // number of pairs of amino acids (i,j) having score at equal to              // the  value of the index
  long double **raw_dist;
  double avg_sim_score;

};

/**
 * raw sequence
 */
struct seq 
{
  char *data;  // sequence data  
  char *name;  // name/description of the sequence
  int num;     // number of the sequence
  int length;  // length of sequence
  int max_seen;
  char *dna_num; // Numbers fo retranslation from Protein to DNA
  int orf_frame; // reading frame of the longest orf
  char crick_strand; // orf translation or sequence translation on crickstrand
};


/**
 * sequence collection
 */
struct seq_col 
{
  struct seq *seqs; // array of the sequences
  int avg_length;   // average length of sequences
  int length;       // number of sequences
};

/**
 * probability distribution of scores in diagonals
 */
struct prob_dist {
  struct scr_matrix *smatrix; // pointer to the associated score matrix
  long double **data;  // distribution of scores dist[i][j] contains the     
                       // probability of a diags of length i having score >=j
  double **log_data; // distribution of scores dist[i][j] contains the     
                  // -log(probability of a diags of length i having score >=j)
  //  long double *expect;      // score expectancy for each diaglen
  unsigned int max_dlen;     // maximum diaglength in dist
};


/**
 * part of a sequence (auxiliary data structure)
 */
struct seq_part {
  int num;         // a number that indicates a position in an array
  struct seq* sq;  // the pointer to the sequence
  int startpos;   // startpos in the sequence
  //int leftmargin;
  //int rightmargin;
};


/**
 * diagonal in the dotmatrix
 */
struct diag {
  struct seq_part seq_p1; // first sequence part
  struct seq_part seq_p2; // seconde sequence part
  unsigned int length;     // length of the diag
  long score;     // score of the diag
  long orig_score;     // orig score of the diag

  struct diag *pred_diag;  // predecessor diag for dynamic programming
  struct diag *col_pred_diag;  // col predecessor diag for dynamic programming
  int pool_pos;            // position in diag pool

  char meetsThreshold;	   // whether diag meets threshold

  // for vertex cover
  int degree;
  int max_degree;
  struct diag **neighbours;

  char anchor;              // if this is an anchor diag
  char marked;              // marking flag for arbitrary use
  char multi_dg;            // is >0 if this is a multi dg
  struct diag **multi_cont; // the contained dgs of this is a multi dg
  int multi_length;         // size of multi_cont

  //  char onlyOverThres;

  double weight;           // weight of the diag = -log(prob)
  double weight_sum;       // weight sum for dialign
  double weight_fac;       // weight factor
  double ov_weight;       // overlap weight
  double total_weight;       // total_weight = weight+o_weight
};

/**
 * collection of diag 
 */
struct simple_diag_col {
  unsigned int length;       // number of diags
  double total_weight;       // total weight
  double weight_fac;         // weight factor
  struct diag** data;        // the array of diags
};

/**
 * guide tree node
 */
struct gt_node {
  char isLeaf;    // whether it is leaf
  int *seq_num;    // the sequence numbers 
  int seq_num_length; // length of sequence numbers array
  struct gt_node *succ1;  // successor nodes
  struct gt_node *succ2;
};


/**
 * vertex cover node
struct vc_node {
  double weight;
  struct diag *dg;
  int degree;
  struct vc_node *adjacents;
}
 */


/**
 * collection of all diagonals sorted by the sequences 
 */ 
struct diag_col {
  int seq_amount;            // number of sequences involved
  struct simple_diag_col** diag_matrix; // diag_matrix[i +seq_amount *j] contains
                             // all diags found involving the sequences i and j

  double total_weight;       // total weight
  double average_weight;     // average_weight
  struct diag** diags; // all diags unordered
  unsigned int diag_amount; // number of diags found 
  struct gt_node *gt_root;
};


/**
 * diag container
 */
struct diag_cont {
  struct diag* dg;
  struct diag_cont *next;
};

/**
 * alignment position
 */
struct algn_pos {
  //  int seq_num;               // sequence number
  //  unsigned long pos_in_seq;  // position in the sequence
  char state;         // orphane: not aligned to any pos, 

  struct diag_cont *dg_cont; // diags that are aligned with that position

  int row;
  int col;
  //  unsigned int succFPos;  // if orphane, the position holding the succF
  // unsigned int predFPos;  // analogous to succFPos
  //  char* isAli;            // if alignemnt at the positions exist
  int* predF;    // predecessor frontier, only filled if non-orphane
  int* succF;    // successor frontier, only filled if non-orphane

  char *proceed;    // for output
  //char isInherited; // whether the pointers are inherited
  int predFPos;  // in case of orphane, where to find the predF or succF
  int succFPos;

  int *eqcAlgnPos; // equivalence class minimum alignment position (for output)
  struct algn_pos *eqcParent; // equivalence class parent
  int eqcRank; // equivalence class rank (>= maximum number of children)
  //  unsigned int *maxpos; // needed for output of the alignment 
};

/**
 * alignment
 */
struct alignment {
  //int seq_amount;            // number of sequences involved
  //char *redo_seqs;           // which pairs of sequences are to be aligned again
  char *seq_is_orphane;      // boolean array indicating for each sequence 
                             // whether it is orphane or not
  int max_pos;               // the greatest position in the alignment (including all -'s)
                             // if <0: the alignment has not yet been prepared
  struct seq_col *scol;      // all the sequences involved
  struct algn_pos **algn;    // the alignment
  double total_weight;       // the total weight of the alignment
  struct alignment *next;    // pointer to next alignment in the sorted linked list
  //struct alignment *prev;    // pointer to previous alignment in the sorted linked list
  //unsigned long pos;         // position in the sorted linked list

  //struct diag** aligned_diags; // all aligned diags
  //int aligned_diags_amount;
  //int max_aligned_diags_amount;
  //int orig_max_aligned_diags_amount;
  
  //struct diag_cont* backlog_diags; // all backlog diags
};