File: glimmer3.hh

package info (click to toggle)
tigr-glimmer 3.02b-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 13,948 kB
  • sloc: cpp: 24,416; awk: 232; csh: 220; makefile: 147; sh: 51
file content (241 lines) | stat: -rw-r--r-- 6,984 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
//  A. L. Delcher
//
//  File:  glimmer3.hh
//
//  Last Modified:  Tue May  9 10:25:40 EDT 2006
//
//  Declarations for  Glimmer3



#ifndef  __GLIMMER3_HH_INCLUDED
#define  __GLIMMER3_HH_INCLUDED


#include  "delcher.hh"
#include  "fasta.hh"
#include  "gene.hh"
#include  "icm.hh"


// Default values of global variables

static const bool  DEFAULT_GENOME_IS_CIRCULAR = true;
static const int  DEFAULT_MIN_GENE_LEN = 100;
static const int  DEFAULT_MAX_OLAP_BASES = 30;
static const int  DEFAULT_RIBOSOME_WINDOW_SIZE = 20;
static const double  DEFAULT_START_PROB []
     = {0.60, 0.30, 0.10};
static const int  DEFAULT_THRESHOLD_SCORE = 30;
static const int  DEFAULT_USE_FIRST_START_CODON = false;
static const int  DEFAULT_USE_INDEPENDENT_SCORE = true;
static const int  HI_SCORE = 100;
  // the highest possible ICM score for an orf
static const double  LONG_ORF_SCORE_PER_BASE = 0.03;
  // artificially good score value for sufficiently long orfs
  //**ALD Should maybe change to a lower value like 0.01 ??


enum  Event_t
  {INITIAL, FWD_START, FWD_STOP, REV_START, REV_STOP, TERMINAL};


struct  Event_Node_t
  {
   int  id : 24;
   int  frame : 3;
   unsigned  is_first_start : 1;
   unsigned  disqualified : 1;
   unsigned  truncated : 1;
   Event_t  e_type;
   int  pos, pwm_sep;
     // pos is the last base of the codon, numbered starting at 1
   double  score, pwm_score;
   Event_Node_t  * frame_pred;
   Event_Node_t  * best_pred;

   Event_Node_t  ()   // default constructor
     { is_first_start = disqualified = truncated = 0; }

   void  Set_Frame_From_Pos
       (void);
  };


static bool  Event_Pos_Cmp
    (Event_Node_t * const & a, Event_Node_t * const & b)
  { return  (a -> pos < b -> pos); }


struct  Orf_Pos_t
  {
   int  start, stop, dir;
   char  * tag;
  };


struct  Range_t
  {
   int  lo, hi;
  };


static bool  Range_Cmp
    (const Range_t & a, const Range_t & b)
  { return  (a . lo < b . lo); }


struct  Position_t
  {
   int  lo, hi, max_prev;
  };


struct  Start_t
  {
   int  j, pos;
   double  score, rate;
   int  which : 8;
   unsigned  truncated : 1;  
   bool  first;
  };



static void  Add_Events
    (const Orf_t & orf, vector <Start_t> & start_list, int id);
static void  Add_PWM_Score
    (Event_Node_t * p);
static void  All_Frame_Score
    (const string & s, int offset, int frame, vector <double> & af);
static void  Clear_Events
    (void);
static void  Complement_Transfer
    (string & buff, const string & s, int lo, int hi);
static void  Disqualify
    (Event_Node_t * p, int cutoff);
static void  Do_Fwd_Stop_Codon
    (int i, int frame, int prev_fwd_stop [3], int first_fwd_start [3],
     int first_fwd_stop [3], int first_base, bool hit_ignore,
     vector <Orf_t> & orf_list);
static void  Echo_General_Settings
    (FILE * fp);
static void  Echo_Specific_Settings
    (FILE * fp, int len);
static double  Entropy_Distance_Ratio
    (int start, int len, int fr);
static int  Find_Uncovered_Position
    (vector <Event_Node_t *> ep);
static void  Find_Orfs
    (vector <Orf_t> & orf_list);
static void  Find_Stops_Reverse
    (const string & s, int len, vector <bool> & has_stop);
static void  Finish_Orfs
    (bool use_wraparound, const int prev_rev_stop [3],
     const int last_rev_start [3], int last_position,
     vector <Orf_t> & orf_list);
static void  Fix_Wrap
    (int & p, const int n);
static int  Frame_To_Sub
    (int f);
static void  Get_Ignore_Regions
    (void);
static void  Get_Orf_Pos_List
    (void);
static void  Handle_First_Forward_Stop
    (int fr, int pos, int start_pos, int first_base, int & gene_len,
     int & orf_len, bool use_wraparound);
static void  Handle_First_Reverse_Stop
    (int pos, int last_start, int & gene_len, int & orf_stop, bool hit_ignore);
static void  Handle_Last_Reverse_Stop
    (int fr, const int prev_rev_stop [3], const int last_rev_start [3],
     int & gene_len, int & orf_len, bool use_wraparound, int last_position);
static void  Initialize_Terminal_Events
    (Event_Node_t & first_event, Event_Node_t & final_event,
     Event_Node_t * best_event [6], Event_Node_t * last_event [6]);
static void  Integerize_Scores
    (const vector <double> ds, int hi_score, const vector <bool> set_zero,
    vector <int> & is);
static double  Olap_Score_Adjustment
    (int lo, int hi, int f1, int f2);
static int  On_Seq_0
    (int i);
static int  On_Seq_1
    (int i);
static void  Output_Extra_Start_Info
    (FILE * fp, int i, int lo, int hi, int frame,
     vector <Start_t> & start_list);
static void  Parse_Command_Line
    (int argc, char * argv []);
template  <class DT>
static void  Permute_By_Frame
    (vector <DT> & v, int frame);
int  Position_To_Frame
    (int p);
static void  Print_Comma_Separated_Strings
    (const vector <const char *> & v, FILE * fp);
static void  Print_Headings
    (FILE * fp);
static void  Print_Orflist_Headings
    (FILE * fp);
static const char  * Print_String
    (Event_t e);
static void  Prob_To_Logs
    (vector <double> & v);
static void  Process_Events
    (void);
static void  Process_Fwd_Start_Event
    (Event_Node_t * ep);
static void  Process_Fwd_Stop_Event
    (Event_Node_t * ep);
static void  Process_Initial_Event
    (Event_Node_t * ep);
static void  Process_Rev_Start_Event
    (Event_Node_t * ep);
static void  Process_Rev_Stop_Event
    (Event_Node_t * ep);
static void  PWM_Score_Fwd_Start
    (int pos, const PWM_t & pwm, int window, double & score, int & separation);
static void  PWM_Score_Rev_Start
    (int pos, const PWM_t & pwm, int window, double & score, int & separation);
static void  Read_Entropy_Profiles
    (const char * fn, bool & errflg);
static void  Read_Sequences
    (FILE * fp, vector <string> & seq_list, vector <string> & hdr_list,
     int & seq_ct);
static void  Requalify
    (Event_Node_t * p, int cutoff);
static void  Reverse_Complement_Transfer
    (string & buff, const string & s, int lo, int hi);
static void  Reverse_Transfer
    (string & buff, const string & s, int start, int len);
static void  Score_Orflist
    (FILE * detail_fp, FILE * summary_fp);
static void  Score_Orfs
    (vector <Orf_t> & orf_list, vector <Gene_t> & gene_list, FILE * fp);
static void  Score_Separate_Input
    (const string & seq, const string & hdr, int seq_num, FILE * detail_fp,
     FILE * predict_fp);
static void  Set_Final_Event
    (Event_Node_t & fe, Event_Node_t * best_event [6],
     int seq_len);
static void  Set_GC_Fraction
    (double & gc, const vector <string> & s);
static void  Set_Ignore_Score_Len
    (void);
static void  Set_Start_And_Stop_Codons
    (void);
static void  Shift_Events
    (vector <Event_Node_t *> & ep, int reference_pos);
static void  Show_Events
    (FILE * fp);
static void  Trace_Back
    (FILE * fp, const Event_Node_t & final_event);
static void  Usage
    (void);
static void  Wrap_Around_Back
    (int wfr, int pos, int & gene_len, int & orf_len);
static void  Wrap_Through_Front
    (int fr, int pos, int & gene_len, int & orf_len);

#endif