File: misc.cpp

package info (click to toggle)
bitseq 0.7.5+dfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 676 kB
  • sloc: cpp: 7,043; python: 562; makefile: 150; sh: 52
file content (240 lines) | stat: -rw-r--r-- 7,316 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#include <algorithm>
#include <ctime>
#include <cmath>

#include "misc.h"

#include "FileHeader.h"

#include "common.h"

namespace ns_math {
double logAddExp(double a, double b){ //{{{
   if(a>b){
      return a+log1p(exp(b-a));
   }else {
      return b+log1p(exp(a-b));
   }
} //}}}
double logSumExp(const vector<double> &vals, long st, long en){ //{{{
   if(st<0)st = 0;
   if((en == -1) || (en > (long)vals.size())) en = vals.size();
   if(st >= en)return 0;
   double sumE = 0, m = *max_element(vals.begin() + st,vals.begin() + en);
   for(long i = st; i < en; i++)
      sumE += exp(vals[i] - m);
   return  m + log(sumE);
} //}}}
} // namespace ns_math

namespace ns_expression {

string getOutputType(const ArgumentParser &args, const string &defaultType){ //{{{
   string type = ns_misc::toLower(args.getS("outputType"));
   if((type!="theta") && (type!="rpkm") && (type!="counts") && (type!="tau")){
      type = defaultType;
      warning("Using output type %s.",type.c_str());
   }
   return type;
} //}}}
} // namespace ns_expression

namespace ns_misc {
long getSeed(const ArgumentParser &args){//{{{
   long seed;
   if(args.isSet("seed"))seed=args.getL("seed");
   else seed = time(NULL);
   if(args.verbose)message("seed: %ld\n",seed);
   return seed;
}//}}}
bool openOutput(const ArgumentParser &args, ofstream *outF){//{{{
   outF->open(args.getS("outFileName").c_str());
   if(!outF->is_open()){
      error("Main: Output file open failed.\n");
      return false;
   }
   return true;
}//}}}
bool openOutput(const string &name, ofstream *outF) {//{{{
   outF->open(name.c_str());
   if(!outF->is_open()){
      error("Main: File '%s' open failed.\n",name.c_str());
      return false;
   }
   return true;
}//}}}

bool readConditions(const ArgumentParser &args, long *C, long *M, long *N, Conditions *cond){//{{{
   if(! cond->init("NONE", args.args(), C, M, N)){
      error("Main: Failed loading MCMC samples.\n");
      return false;
   }
   if(args.isSet("normalization")){
      if(! cond->setNorm(args.getTokenizedS2D("normalization"))){
         error("Main: Applying normalization constants failed.\n");
         return false;
      }
   }
   if(!cond->logged() && args.verb()){
      message("Samples are not logged. (will log for you)\n");
      message("Using %lg as minimum instead of log(0).\n",LOG_ZERO);
   }
   if(args.verb())message("Files with samples loaded.\n");
   return true;
}//}}}

void computeCI(double cf, vector<double> *difs, double *ciLow, double *ciHigh){//{{{
   cf = (100 - cf) / 2.0;
   double N = difs->size();
   sort(difs->begin(),difs->end());
   *ciLow = (*difs)[(long)(N/100.*cf)];
   *ciHigh = (*difs)[(long)(N-N/100.*cf)];
}//}}}

string toLower(string str){//{{{
   for(size_t i=0;i<str.size();i++)
      if((str[i]>='A')&&(str[i]<='Z'))str[i]=str[i]-'A'+'a';
   return str;
}//}}}

vector<string> tokenize(const string &input,const string &space){//{{{
   vector<string> ret;
   long pos=0,f=0,n=input.size();
   while((pos<n)&&(f<n)&&(f>=0)){
      f=input.find(space,pos);
      if(f==pos)pos++;
      else{
         if((f<n)&&(f>=0)){
            ret.push_back(input.substr(pos,f-pos));
            pos=f+1;
         }
      }
   }
   if(pos<n)ret.push_back(input.substr(pos,n-pos));
   return ret;
} //}}}
} // namespace ns_misc

namespace ns_genes {
bool getLog(const ArgumentParser &args){// {{{
   if(args.flag("log")){
      if(args.verb())message("Using logged values.\n");
      return true;
   }
   if(args.verb())message("NOT using logged values.\n");
   return false;
}// }}}

bool prepareInput(const ArgumentParser &args, TranscriptInfo *trInfo, PosteriorSamples *samples, long *M, long *N, long *G){// {{{
   if(! trInfo->readInfo(args.getS("trInfoFileName"))) return false;
   *G = trInfo->getG();
   if((! samples->initSet(M,N,args.args()[0]))||(*M<=0)||(*N<=0)){
      error("Main: Failed loading MCMC samples.\n");
      return false;
   }
   if(*M!=trInfo->getM()){
      error("Main: Number of transcripts in the info file and samples file are different: %ld vs %ld\n",trInfo->getM(),*M);
      return false;
   }
   if(args.verb())messageF("Transcripts: %ld\n",*M);
   return true;
}// }}}

bool updateGenes(const ArgumentParser &args, TranscriptInfo *trInfo, long *G){//{{{
   if(!(args.isSet("trMapFile") || args.isSet("geneListFile")))return true;
   if(args.isSet("trMapFile") && args.isSet("geneListFile")){
      error("Main: Please provide only one of trMapFile and geneListFile, both serve the same function.\n");
      return false;
   }
   bool isMap;
   ifstream mapFile;
   if(args.isSet("trMapFile")){
      isMap = true;
      mapFile.open(args.getS("trMapFile").c_str());
   }else {
      isMap = false;
      mapFile.open(args.getS("geneListFile").c_str());
   }
   if(!mapFile.is_open()){
      if(isMap){
         error("Main: Failed reading file with transcript to gene mapping.\n");
      }else{
         error("Main: Failed reading file with gene names.\n");
      }
      return false;
   }
   map<string,string> trMap;
   vector<string> geneList;
   string trName,geName;
   while(mapFile.good()){
      while(mapFile.good() && (mapFile.peek()=='#'))
         mapFile.ignore(100000000,'\n');
      if(!mapFile.good()) break;
      mapFile>>geName;
      if(isMap){
         mapFile>>trName;
      }
      if(!mapFile.fail()){
         if(isMap){
            trMap[trName]=geName;
         }else{
            geneList.push_back(geName);
         }
      }
      mapFile.ignore(100000000,'\n');
   }
   mapFile.close();
   bool succ;
   if(isMap)succ = trInfo->updateGeneNames(trMap);
   else succ = trInfo->updateGeneNames(geneList);
   if(!succ){
      error("Main: Filed setting gene information.\n");
      return false;
   }
   *G = trInfo->getG();
   return true;
}//}}}

bool checkGeneCount(long G, long M){//{{{
   if((G != 1) && (G != M)) return true;
   if(G==1){
      error("Main: All transcripts share just one gene.\n");
   }else{
      error("Main: There are no transcripts sharing one gene.\n");
   }
   message("Please provide valid transcript to gene mapping (trMapFile or geneListFile).\n"
           "   (trMap file should contain rows in format: <geneName> <transcriptName>.)\n"
           "   (geneList file should contain rows with gene names, one per transcript.)\n");
   return false;
}//}}}
} // namespace ns_genes

namespace ns_params {
bool readParams(const string &name, vector<paramT> *params, ofstream *outF){//{{{
   long parN;
   ifstream parFile(name.c_str());
   FileHeader fh(&parFile);
   if(!fh.paramsHeader(&parN, outF)){
      error("Main: Problem loading parameters file %s\n",name.c_str());
      return false;
   }
   // Vector of parameters: (mean expression, (alpha, beta) )
   paramT param;
   while(parFile.good()){
      while((parFile.good())&&(parFile.peek()=='#')){
         parFile.ignore(10000000,'\n');
      }
      parFile>>param.alpha>>param.beta>>param.expr;
      if(parFile.good())
         params->push_back(param);
      parFile.ignore(10000000,'\n');
   }
   if((parN>0)&&(parN != (long)params->size())){
      warning("Main: declared number of parameters does not match number of lines read (%ld %ld).\n", parN, (long)params->size());
   }
   fh.close();
   sort(params->begin(),params->end());
   return true;
}//}}}

} // namespace ns_params