File: getPPLR.cpp

package info (click to toggle)
bitseq 0.7.5+dfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 676 kB
  • sloc: cpp: 7,043; python: 562; makefile: 150; sh: 52
file content (152 lines) | stat: -rw-r--r-- 5,667 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <vector>

using namespace std;

#include "ArgumentParser.h"
#include "common.h"
#include "misc.h"
#include "PosteriorSamples.h"

int main(int argc,char* argv[]){
   string programDescription=
"Computes PPLR from MCMC expression samples.\n"
"   (the probability of second condition being up-regulated)\n"
"   Also computes log2 fold change with confidence intervals, and condition mean log expression.\n"
"   [sampleFiles] should contain transposed MCMC samples from different conditions.";
   // Set options {{{
   ArgumentParser args(programDescription,"[sampleFile-C1] [sampleFile-C1]",1);
   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
   args.addOptionB("","inputIsLogged","logged",0,"Indicate that the input expression estimates are on log scale. (Not necessary to use with data generated by BitSeq-0.5.0 and above.)");
   args.addOptionB("d","distribution","distribution",0,"Produce whole distribution of differences.");
   args.addOptionS("s","selectFile","selectFileName",0,"File containing list of selected transcript IDs (zero based), only these will be reported. Only works with --distribution option.");
   args.addOptionD("","subSample","subSample",0,"Sub-sample the distributions using a given fraction of expression samples.",1.0);
   if(!args.parse(argc,argv))return 0;
   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
   // }}}

   long i,m,N,M;
   bool getAll=false, doLog = true;
   vector<long> trSelect;
   if(! args.isSet("selectFileName")){
      getAll=true;
   }else{
      ifstream selectF (args.getS("selectFileName").c_str());
      if(! selectF.is_open()){
         cerr<<"ERROR: Main: Failed loading selected transcripts."<<endl;
         return 1;
      }
      selectF>>m;
      while(selectF.good()){
         trSelect.push_back(m);
         selectF>>m;
      }
      selectF.close();
      sort(trSelect.begin(),trSelect.end());
   }

   Conditions cond;
   if(! cond.init("NONE", args.args(), &M, &N)){
      cerr<<"ERROR: Main: Failed loading conditions."<<endl;
      return 1;
   }
   if(cond.logged() || args.flag("logged")) {
      doLog = false;
      if(args.verbose)cout<<"Assuming values are logged already."<<endl;
   }else {
      doLog = true;
      if(args.verbose)cout<<"Will use logged values."<<endl;
   }
   if(args.verbose)cout<<"M "<<M<<"   N "<<N<<endl;
   ofstream outFile(args.getS("outFileName").c_str());
   if(! outFile.is_open()){
      cerr<<"ERROR: Main: File write probably failed!"<<endl;
      return 1;
   }
   if(getAll){
      trSelect.resize(M);
      for(i=0;i<M;i++)trSelect[i]=i;
   }
   
   vector<vector<double> > tr(2);
   vector<double> difs;
   long subN = N;
   double frac = args.getD("subSample");
   if((frac > 0) && (frac < 1))subN = (long)(N * frac);
   if(subN<1){
      cerr<<"ERROR: The fraction of samples for sub-sampling is too small."<<endl;
      return 1;
   }
   if((args.getD("subSample")!=1) && args.verbose){
      cout<<"Using "<<subN<<" samples for sub-sampling."<<endl;
   }
   double pplr,mu_0,mu_1,log2FC,ciLow,ciHigh;
   if(! args.flag("distribution")){
      if(args.verbose)cout<<"Counting PPLR"<<endl;
      outFile<<"# Computed PPLR, log2 fold change with 95\% confidence intervals, condition mean log expression."<<endl;
      outFile<<"# M "<<M<<"\n# columns:"<<endl;
      outFile<<"# PPLR log2FoldChange ConfidenceLow ConfidenceHigh MeanLogExpressionC1 MeanLogExpressionC2"<<endl;
      for(m=0;m<M;m++){
         if(args.verbose)progressLog(m,M);
         cond.getTranscript(0,m,tr[0],subN);
         cond.getTranscript(1,m,tr[1],subN);
         difs.resize(subN);
         pplr = log2FC = mu_0 = mu_1 = 0;
         for(i=0;i<subN;i++){
            if(doLog){
               if((tr[0][i] <= 0) || (tr[1][i] <= 0)){
                  cerr<<"ERROR: Found non-positive expression (transcript: "<<m<<").\n"
                        "       The expression is probably in log scale already.\n"
                        "       Please check your data and use --inputIsLogged if that is the case."
                      <<endl;
                  return 1;
               }
               tr[1][i] = log(tr[1][i]);
               tr[0][i] = log(tr[0][i]);
            }
            if(tr[1][i]>tr[0][i])pplr+=1;
            difs[i]=tr[1][i]-tr[0][i];
            log2FC+=tr[1][i]-tr[0][i];
            mu_0 += tr[0][i];
            mu_1 += tr[1][i];
         }
         pplr /= subN;
         mu_0 /= subN;
         mu_1 /= subN;
         log2FC /= subN*log(2);
         ns_misc::computeCI(95, &difs, &ciLow, &ciHigh);
         ciLow /= log(2);
         ciHigh /= log(2);
         outFile<<pplr<<" "<<log2FC<<" "<<ciLow<<" "<<ciHigh<<" "<<mu_0<<" "<<mu_1<<endl;
      }
   }else{
      if(args.verbose)cout<<"Computing Log Ratio distribution"<<endl;
      long selectM = trSelect.size();
      outFile<<"# Log Ratio distribution"<<endl;
      outFile<<"# T "<<endl;
      outFile<<"# M "<<selectM<<endl;
      outFile<<"# N "<<subN<<endl;
      outFile<<"# first column - transcript number (zero based)"<<endl;
      for(m=0;m<selectM;m++){
         if(selectM>10)progressLog(m,M);
         cond.getTranscript(0,trSelect[m],tr[0],subN);
         cond.getTranscript(1,trSelect[m],tr[1],subN);
         outFile<<trSelect[m]<<" ";
         for(i=0;i<subN;i++){
            if(doLog){
               tr[1][i] = log(tr[1][i]);
               tr[0][i] = log(tr[0][i]);
            }
            outFile<<tr[1][i]-tr[0][i]<<" ";
         }
         outFile<<endl;
      }
   }
   outFile.close();
   cond.close();
   return 0;
}