1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
|
#include <algorithm>
#include <ctime>
#include <cmath>
#include "misc.h"
#include "FileHeader.h"
#include "common.h"
namespace ns_math {
double logAddExp(double a, double b){ //{{{
if(a>b){
return a+log1p(exp(b-a));
}else {
return b+log1p(exp(a-b));
}
} //}}}
double logSumExp(const vector<double> &vals, long st, long en){ //{{{
if(st<0)st = 0;
if((en == -1) || (en > (long)vals.size())) en = vals.size();
if(st >= en)return 0;
double sumE = 0, m = *max_element(vals.begin() + st,vals.begin() + en);
for(long i = st; i < en; i++)
sumE += exp(vals[i] - m);
return m + log(sumE);
} //}}}
} // namespace ns_math
namespace ns_expression {
string getOutputType(const ArgumentParser &args, const string &defaultType){ //{{{
string type = ns_misc::toLower(args.getS("outputType"));
if((type!="theta") && (type!="rpkm") && (type!="counts") && (type!="tau")){
type = defaultType;
warning("Using output type %s.",type.c_str());
}
return type;
} //}}}
} // namespace ns_expression
namespace ns_misc {
long getSeed(const ArgumentParser &args){//{{{
long seed;
if(args.isSet("seed"))seed=args.getL("seed");
else seed = time(NULL);
if(args.verbose)message("seed: %ld\n",seed);
return seed;
}//}}}
bool openOutput(const ArgumentParser &args, ofstream *outF){//{{{
outF->open(args.getS("outFileName").c_str());
if(!outF->is_open()){
error("Main: Output file open failed.\n");
return false;
}
return true;
}//}}}
bool openOutput(const string &name, ofstream *outF) {//{{{
outF->open(name.c_str());
if(!outF->is_open()){
error("Main: File '%s' open failed.\n",name.c_str());
return false;
}
return true;
}//}}}
bool readConditions(const ArgumentParser &args, long *C, long *M, long *N, Conditions *cond){//{{{
if(! cond->init("NONE", args.args(), C, M, N)){
error("Main: Failed loading MCMC samples.\n");
return false;
}
if(args.isSet("normalization")){
if(! cond->setNorm(args.getTokenizedS2D("normalization"))){
error("Main: Applying normalization constants failed.\n");
return false;
}
}
if(!cond->logged() && args.verb()){
message("Samples are not logged. (will log for you)\n");
message("Using %lg as minimum instead of log(0).\n",LOG_ZERO);
}
if(args.verb())message("Files with samples loaded.\n");
return true;
}//}}}
void computeCI(double cf, vector<double> *difs, double *ciLow, double *ciHigh){//{{{
cf = (100 - cf) / 2.0;
double N = difs->size();
sort(difs->begin(),difs->end());
*ciLow = (*difs)[(long)(N/100.*cf)];
*ciHigh = (*difs)[(long)(N-N/100.*cf)];
}//}}}
string toLower(string str){//{{{
for(size_t i=0;i<str.size();i++)
if((str[i]>='A')&&(str[i]<='Z'))str[i]=str[i]-'A'+'a';
return str;
}//}}}
vector<string> tokenize(const string &input,const string &space){//{{{
vector<string> ret;
long pos=0,f=0,n=input.size();
while((pos<n)&&(f<n)&&(f>=0)){
f=input.find(space,pos);
if(f==pos)pos++;
else{
if((f<n)&&(f>=0)){
ret.push_back(input.substr(pos,f-pos));
pos=f+1;
}
}
}
if(pos<n)ret.push_back(input.substr(pos,n-pos));
return ret;
} //}}}
} // namespace ns_misc
namespace ns_genes {
bool getLog(const ArgumentParser &args){// {{{
if(args.flag("log")){
if(args.verb())message("Using logged values.\n");
return true;
}
if(args.verb())message("NOT using logged values.\n");
return false;
}// }}}
bool prepareInput(const ArgumentParser &args, TranscriptInfo *trInfo, PosteriorSamples *samples, long *M, long *N, long *G){// {{{
if(! trInfo->readInfo(args.getS("trInfoFileName"))) return false;
*G = trInfo->getG();
if((! samples->initSet(M,N,args.args()[0]))||(*M<=0)||(*N<=0)){
error("Main: Failed loading MCMC samples.\n");
return false;
}
if(*M!=trInfo->getM()){
error("Main: Number of transcripts in the info file and samples file are different: %ld vs %ld\n",trInfo->getM(),*M);
return false;
}
if(args.verb())messageF("Transcripts: %ld\n",*M);
return true;
}// }}}
bool updateGenes(const ArgumentParser &args, TranscriptInfo *trInfo, long *G){//{{{
if(!(args.isSet("trMapFile") || args.isSet("geneListFile")))return true;
if(args.isSet("trMapFile") && args.isSet("geneListFile")){
error("Main: Please provide only one of trMapFile and geneListFile, both serve the same function.\n");
return false;
}
bool isMap;
ifstream mapFile;
if(args.isSet("trMapFile")){
isMap = true;
mapFile.open(args.getS("trMapFile").c_str());
}else {
isMap = false;
mapFile.open(args.getS("geneListFile").c_str());
}
if(!mapFile.is_open()){
if(isMap){
error("Main: Failed reading file with transcript to gene mapping.\n");
}else{
error("Main: Failed reading file with gene names.\n");
}
return false;
}
map<string,string> trMap;
vector<string> geneList;
string trName,geName;
while(mapFile.good()){
while(mapFile.good() && (mapFile.peek()=='#'))
mapFile.ignore(100000000,'\n');
if(!mapFile.good()) break;
mapFile>>geName;
if(isMap){
mapFile>>trName;
}
if(!mapFile.fail()){
if(isMap){
trMap[trName]=geName;
}else{
geneList.push_back(geName);
}
}
mapFile.ignore(100000000,'\n');
}
mapFile.close();
bool succ;
if(isMap)succ = trInfo->updateGeneNames(trMap);
else succ = trInfo->updateGeneNames(geneList);
if(!succ){
error("Main: Filed setting gene information.\n");
return false;
}
*G = trInfo->getG();
return true;
}//}}}
bool checkGeneCount(long G, long M){//{{{
if((G != 1) && (G != M)) return true;
if(G==1){
error("Main: All transcripts share just one gene.\n");
}else{
error("Main: There are no transcripts sharing one gene.\n");
}
message("Please provide valid transcript to gene mapping (trMapFile or geneListFile).\n"
" (trMap file should contain rows in format: <geneName> <transcriptName>.)\n"
" (geneList file should contain rows with gene names, one per transcript.)\n");
return false;
}//}}}
} // namespace ns_genes
namespace ns_params {
bool readParams(const string &name, vector<paramT> *params, ofstream *outF){//{{{
long parN;
ifstream parFile(name.c_str());
FileHeader fh(&parFile);
if(!fh.paramsHeader(&parN, outF)){
error("Main: Problem loading parameters file %s\n",name.c_str());
return false;
}
// Vector of parameters: (mean expression, (alpha, beta) )
paramT param;
while(parFile.good()){
while((parFile.good())&&(parFile.peek()=='#')){
parFile.ignore(10000000,'\n');
}
parFile>>param.alpha>>param.beta>>param.expr;
if(parFile.good())
params->push_back(param);
parFile.ignore(10000000,'\n');
}
if((parN>0)&&(parN != (long)params->size())){
warning("Main: declared number of parameters does not match number of lines read (%ld %ld).\n", parN, (long)params->size());
}
fh.close();
sort(params->begin(),params->end());
return true;
}//}}}
} // namespace ns_params
|