File: extractSamples.cpp

package info (click to toggle)
bitseq 0.7.5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 636 kB
  • sloc: cpp: 7,043; python: 562; makefile: 150; sh: 52
file content (126 lines) | stat: -rw-r--r-- 3,885 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
 *
 * Extract samples of given transcripts.
 *
 *
 */
#include<iostream>
#include<cstdlib>
#include<algorithm>

using namespace std;

#include "PosteriorSamples.h"
#include "ArgumentParser.h"
#include "common.h"

#define Sof(x) (long)x.size()

vector <long> tokenizeL(const string &input,const string &space = " "){//{{{
   vector <long> ret;
   long pos=0,f=0,n=input.size();
   while((pos<n)&&(f<n)&&(f>=0)){
      f=input.find(space,pos);
      if(f==pos)pos++;
      else{
         if((f <n)&&(f>=0)){
            ret.push_back(atoi(input.substr(pos,f-pos).c_str()));
            pos=f+1;
         }
      }
   }
   if(pos<n)ret.push_back(atoi(input.substr(pos,n-pos).c_str()));
   return ret;
} //}}}

int main(int argc,char* argv[]){
   srand(time(NULL));
   string programDescription=
"Extracts MCMC samples of selected transcripts.\n\
   [sampleFiles] should contain transposed MCMC samples.";   
   // Set options {{{
   ArgumentParser args(programDescription,"[sampleFiles]",1);
   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
   args.addOptionS("L","list","list",0,"Comma delimited list of ZERO-BASED transcript ids (i.e. lines) which should be extracted: 0,17,47,1024,4777");
   args.addOptionL("r","random","randomN",0,"Choose random [randomN] transcripts.");
   if(!args.parse(argc,argv))return 0;
   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
   // }}}
   long i,j,c,C,N,M=0,S;
   vector<long> trList;
   Conditions samples;

   // Initialize samples reader
   if( (!samples.init("NONE", args.args(), &C, &M, &N)) || (C<=0) || (M<=0) || (N<=0)){
      cerr<<"ERROR: Main: Failed loading MCMC samples."<<endl;
      return 1;
   }
   C=samples.getRN();
   if(args.isSet("list")){
      // Process transcripts list:
      trList = tokenizeL(args.getS("list"),",");
      sort(trList.begin(),trList.end());
      // Erase invalid and duplicate IDs
      for(i=0;i<Sof(trList);i++){
         if((trList[i]<0)||(trList[i]>=M)||((i>0)&&(trList[i]==trList[i-1]))){
            trList.erase(trList.begin()+i);
            i--;
         }
      }   
      S=Sof(trList);
      if(S==0){
         cerr<<"ERROR: Main: No valid transcript IDs supplied."<<endl;
         return 1;
      }
   }else if(args.isSet("randomN")){
      // Create list of [randomN] random transcripts
      S = args.getL("randomN");
      if((S<=0)||(S>M)){
         cerr<<"ERROR: Main: Wrong number of transcripts ot output: "<<S<<"."<<endl;
         return 1;
      }
      for(i=0;i<S;i++){
         j = rand()%M;
         while(find(trList.begin(),trList.end(),j)!=trList.end())
            j = rand()%M;
         trList.push_back(j);
      }
      sort(trList.begin(),trList.end());
   }else{
      cerr<<"ERROR: Main: Need to specify at least one of --list or --random."<<endl;
      return 1;
   }
   if(args.verbose)cout<<"C: "<<C<<" samples: "<<N<<"\ntranscripts: "<<M<<"\nselected: "<<S<<endl;
   
   // Open output file and write header
   ofstream outFile(args.getS("outFileName").c_str());
   if(! outFile.is_open()){
      cerr<<"ERROR: Main: File write failed!"<<endl;
      return 1;
   }
   outFile<<"# Selected transcripts from: ";
   for(i=0;i<C;i++)outFile<<args.args()[i]<<",";
   outFile<<endl;
   outFile<<"# transcripts(zero-based): "<<trList[0];
   for(i=1;i<S;i++)outFile<<","<<trList[i];
   outFile<<"\n# T (M rows,N cols)\n";
   outFile<<"# C "<<C<<" (conditions)\n";
   outFile<<"# M "<<S<<" (out of: "<<M<<")\n# N "<<N<<endl;
   outFile.precision(9);
   outFile<<scientific;

   // Copy samples
   vector<double> tr;
   for(j=0;j<S;j++){
      if(args.verbose)cout<<trList[j]<<" ";
      cout.flush();
      for(c=0;c<C;c++){
         samples.getTranscript(c,trList[j], tr);
         for(i=0;i<N;i++)outFile<<tr[i]<<" ";
         outFile<<endl;
      }
   }
   outFile.close();
   if(args.verbose)cout<<"DONE"<<endl;
   return 0;
}