File: window-acgt.cc

package info (click to toggle)
tigr-glimmer 3.02b-5
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 13,948 kB
  • sloc: cpp: 24,416; awk: 232; csh: 220; makefile: 147; sh: 51
file content (288 lines) | stat: -rw-r--r-- 7,233 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
//  A. L. Delcher
//
//  File:  window-acgt.cc
//
//  Last Modified:  Tue May 23 10:30:35 EDT 2006
//
//  Read a multifasta file from stdin and report the acgt content
//  of windows in it.  Command line arguments specify the
//  length of windows and their separation.
//  Output goes to stdout in the format
//  window-start  window-len  A's  C's  G's  T's  #other  %GC
//  Note that the last window can be shorter than the specified
//  length.



#include  "window-acgt.hh"


// External variables

extern int  Verbose;
extern int  Global_Debug_Flag;


// Global variables

static bool  Output_Percents = false;
  // If set true (by the -p option) then output percentages instead
  // of counts
static int  Window_Len;
  // Width of window to process; specified on command line
static int  Window_Skip;
  // Number of characters to slide window before reporting the next
  // result



int  main
    (int argc, char * argv [])

  {
   vector <char>  window;  // the actual window characters (a ring buffer)
   char  line [MAX_LINE];
   int  win_pos;    // position of the first character in the current window
   int  win_next;   // next window position to be printed
   int  win_size;   // number of characters in the current window
   int  win_sub;    // subscript of next position in  window
   int  last_pos;   // last window position printed
   int  count [5] = {0};
   int  i;
   

   Verbose = 0;

   Parse_Command_Line (argc, argv);

   window . resize (Window_Len);

   win_pos = win_next = 1;
   win_sub = win_size = last_pos = 0;

   while  (fgets (line, MAX_LINE, stdin) != NULL)
     {
      if  (First_Non_Blank (line) == '>')
          {
           if  (win_pos != last_pos)
               Finish (win_pos, win_size, win_next, count, window, win_sub);
           fputs (line, stdout);
           printf ("%8s %7s %6s %6s %6s %6s %6s %6s\n", "Position", "Length",
                "As", "Cs", "Gs", "Ts", "Other", "%GC");
           win_pos = win_next = 1;
           win_sub = win_size = last_pos = 0;
           for  (i = 0;  i < 5;  i ++)
             count [i] = 0;
          }
        else
          {
           char  * p;

           for  (p = line;  * p != '\0';  p ++)
             if  (! isspace (* p))
                 {
                  if  (win_size == Window_Len)
                      {
                       count [Subscript (window [win_sub])] --;
                         // Substract for character sliding out of the window
                       win_pos ++;
                      }
                    else
                      win_size ++;
                  count [Subscript (* p)] ++;
                  window [win_sub] = * p;
                  win_sub = (win_sub + 1) % Window_Len;

                  if  (win_size == Window_Len && win_pos == win_next)
                      {
                       Print_Line (win_pos, win_size, count);
                       last_pos = win_pos;
                       win_next += Window_Skip;
                      }
                 }
          }
     }

   if  (win_pos != last_pos)
       Finish (win_pos, win_size, win_next, count, window, win_sub);

   return  0;
  }



static void  Finish
    (int win_pos, int win_size, int win_next, int count [5],
     const vector <char> & window, int win_sub)

//  Print the final line for the information in the window
//  beginning at position  win_pos  and  containing  win_size
//  characters.  The counts to be printed are in  count .
//  The ring buffer of window characters is  window  and the current
//  position in it is  win_sub .

  {
   while  (win_pos < win_next && win_size > 0)
     {
      count [Subscript (window [win_sub])] --;
      win_pos ++;
      win_size --;
      win_sub = (win_sub + 1) % Window_Len;
     }

   if  (win_size > 0)
       Print_Line (win_pos, win_size, count);

   return;
  }



static void  Parse_Command_Line
    (int argc, char * argv [])

//  Get options and parameters from command line with  argc
//  arguments in  argv [0 .. (argc - 1)] .

  {
   bool  errflg = false;
   int  ch;

   optarg = NULL;

#if  ALLOW_LONG_OPTIONS
   int  option_index = 0;
   static struct option  long_options [] = {
        {"help", 0, 0, 'h'},
        {"percen", 0, 0, 'p'},
        {0, 0, 0, 0}
      };

   while  (! errflg
        && ((ch = getopt_long (argc, argv, "hp",
                     long_options, & option_index)) != EOF))
#else
   while  (! errflg
        && ((ch = getopt (argc, argv, "hp")) != EOF))
#endif

     switch  (ch)
       {
        case  'h' :
          Usage ();
          exit (EXIT_SUCCESS);

        case  'p' :
          Output_Percents = true;
          break;

        default :
          errflg = true;
       }

   if  (errflg || optind > argc - 2)
       {
        Usage ();
        exit (EXIT_FAILURE);
       }

   Window_Len = int (strtol (argv [optind ++], NULL, 10));
   Window_Skip = int (strtol (argv [optind ++], NULL, 10));

   if  (Window_Len < 1)
       {
        sprintf (Clean_Exit_Msg_Line, "ERROR:  Bad window length = %d", Window_Len);
        SIMPLE_THROW (Clean_Exit_Msg_Line);
       }
   if  (Window_Skip < 1)
       {
        sprintf (Clean_Exit_Msg_Line, "ERROR:  Bad window skip = %d", Window_Skip);
        SIMPLE_THROW (Clean_Exit_Msg_Line);
       }

   return;
  }



static void  Print_Line
    (int win_pos, int win_size, int count [5])

//  Print the output line for window beginning at position
//   win_pos  containing  win_size  characters and counts in
//   count .

  {
   int  i;

   printf ("%8d %7d", win_pos, win_size);
   if  (Output_Percents)
       for  (i = 0;  i < 5;  i ++)
         printf (" %6.1f", Percent (count [i], win_size));
     else
       for  (i = 0;  i < 5;  i ++)
         printf (" %6d", count [i]);
   printf (" %6.1f", Percent (count [1] + count [2], win_size));
   putchar ('\n');

   return;
  }



int static  Subscript
    (char ch)

//  Return the subscript (in  count ) corresponding to  ch .

  {
   switch (tolower (ch))
     {
      case  'a' :
        return  0;
      case  'c' :
        return  1;
      case  'g' :
        return  2;
      case  't' :
        return  3;
      default :
        return  4;
     }
  }



static void  Usage
    (void)

//  Print to stderr description of options and command line for
//  this program.

  {
   fprintf (stderr,
       "USAGE:  window-acgt [options] window-len window-skip < input-file\n"
       "\n"
       "Read multi-fasta-format file from standard input.\n"
       "Print the acgt-content of windows in each sequence.\n"
       "The width of windows is <window-len>.  The number of\n"
       "positions between windows to report is <window-skip>\n"
       "So the overlap between consecutive windows is\n"
       "<window-len> - <window-skip> positions\n"
       "Output goes to standard output in the format:\n"
       "  window-start  window-len  A's C's G's T's #other %%GC\n"
       "Note the last window in the sequence can be shorter than\n"
       "<window-len> if the sequence ends prematurely\n"
       "\n"
       "Options:\n"
       " -h  or  --help\n"
       "    Print this message\n"
       " -p  or  --percent\n"
       "    Output percentages instead of counts\n"
       "\n");

   return;
  }