File: someUtil.cpp

package info (click to toggle)
fastml 3.11-4
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 5,772 kB
sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (1077 lines) | stat: -rw-r--r-- 30,259 bytes
parent folder | download | duplicates (2)
// $Id: someUtil.cpp 15479 2016-10-10 16:25:21Z elilevy $

#include "someUtil.h"
#include "errorMsg.h"
#include "talRandom.h"
#include <cmath>
#include <ctime>
#include <iterator>
#include <algorithm>
#include <string>
#include <cctype>
#include <cassert>
using namespace std;

// for the _mkdir call
#if defined(WIN32) || defined(SunOS) || defined(solaris)
  #include <direct.h>
#else
  #include <sys/file.h>
  #include <dirent.h>
//  #include <io.h>
#endif

//swap between the 4 variables such that the first becomes the second, second becomes the third and third becomes the fourth.
//used in functoin mnbrack below.
void shift3(MDOUBLE &a, MDOUBLE &b, MDOUBLE &c, const MDOUBLE d) {
	a=b;
	b=c;
	c=d;
}

MDOUBLE computeAverage(const vector<int>& vec) {
	MDOUBLE sum=0.0;
	for (int i=0; i < vec.size(); ++i) {
		sum+=static_cast<MDOUBLE>(vec[i]);
	}
	return sum/static_cast<MDOUBLE>(vec.size());
}

// X ~ Poisson(lamda) -->	P(X=k) = ((lamda^k)/k!) * e^(-lamda)
// It isn't smart to first calculate factorial(k) because the size of long int limits this calculation to k<=13
MDOUBLE copmutePoissonProbability(const int& k, const long double& lamda)
{
	assert(k>=0);
	long double tmp = pow(lamda,k); // tmp = (lamda^k)/k!
	
	for (int i=2; i<=k; ++i)
		tmp/=i;

	return (tmp * exp(-lamda));
}


MDOUBLE computeAverage(const vector<MDOUBLE>& vec, const Vdouble*  weightsV) {
	MDOUBLE sum=0.0;
	if(weightsV && !(weightsV->size() == vec.size() ))
		errorMsg::reportError("Using computeAverage with weights, where the number of weights not equal values");
	for (int i=0; i < vec.size(); ++i){
		if(weightsV)
			sum+=vec[i]* (*weightsV)[i];
		else
			sum+=vec[i];
	}
	return sum/static_cast<MDOUBLE>(vec.size());
}

MDOUBLE computeAverageOfAbs(const vector<MDOUBLE>& vec, const Vdouble*  weightsV) {
	MDOUBLE sum=0.0;
	if(weightsV && !(weightsV->size() == vec.size() ))
		errorMsg::reportError("Using computeAverage with weights, where the number of weights not equal values");
	for (int i=0; i < vec.size(); ++i){
		if(weightsV)
			sum+=abs(vec[i]* (*weightsV)[i]);
		else
			sum+=abs(vec[i]);
	}
	return sum/static_cast<MDOUBLE>(vec.size());
}

MDOUBLE computeMedian(const vector<MDOUBLE>& vec) {
	int vecSize = vec.size();
	if (vecSize<1)
		return 0;
	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	for (int x =0; x < vecSize ; ++x)
	{
		sortVec[x].setValue(vec[x]);
		sortVec[x].setPlace(x);
	}
	sort(sortVec.begin(), sortVec.end());
	sort(sortVec.begin(), sortVec.end());

	int highMedianIndex;
	if(vecSize>1)
		highMedianIndex = int((vecSize+1)/2);
	else
		highMedianIndex = int((vecSize)/2); // thus, highMedianIndex==0

	
	MDOUBLE	median = sortVec[highMedianIndex].getValue();
	return median;
}

//// if quantile=0.5, the median is returned, if quantile=0.1, the low-ton-percentile is returned, quantile=0.9, the top-90-percentile is returned
MDOUBLE computeQuantileFrac(const vector<MDOUBLE>& vec, MDOUBLE quantile) {
	int vecSize = vec.size();
	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	for (int x =0; x < vecSize ; ++x)
	{
		sortVec[x].setValue(vec[x]);
		sortVec[x].setPlace(x);
	}
	sort(sortVec.begin(), sortVec.end());
	sort(sortVec.begin(), sortVec.end());

	int qIndex = int((vecSize+1)*quantile);
	MDOUBLE quantileVal = sortVec[qIndex].getValue();
	return quantileVal;
}

//// if quantile=2, the median is returned, if quantile=10, the ten-percentile is returned
MDOUBLE computeQuantile(const vector<MDOUBLE>& vec, MDOUBLE quantile) {
	MDOUBLE dividerForRank = 1+ 1.0/(quantile-1);

	int vecSize = vec.size();
	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	for (int x =0; x < vecSize ; ++x)
	{
		sortVec[x].setValue(vec[x]);
		sortVec[x].setPlace(x);
	}
	sort(sortVec.begin(), sortVec.end());
	sort(sortVec.begin(), sortVec.end());

	int qIndex = int((vecSize+1)/dividerForRank);
	MDOUBLE quantileVal = sortVec[qIndex].getValue();
	return quantileVal;
}


MDOUBLE computeStd(const vector<int>& vec) {// page 60, Sokal and Rohlf
	MDOUBLE sum=0.0;
	MDOUBLE sumSqr=0.0;
	MDOUBLE vecSize = static_cast<MDOUBLE>(vec.size());
	for (int i=0; i < vec.size(); ++i) {
		sum+=static_cast<MDOUBLE>(vec[i]);
		sumSqr+=(static_cast<MDOUBLE>(vec[i])*static_cast<MDOUBLE>(vec[i]));
	}
	MDOUBLE res= sumSqr-(sum*sum/vecSize);
	res /= (vecSize-1.0);
	res = sqrt(res);
	return res;
}

MDOUBLE computeStd(const vector<MDOUBLE>& vec) {// page 60, Sokal and Rohlf
	MDOUBLE sum=0.0;
	MDOUBLE sumSqr=0.0;
	MDOUBLE vecSize = static_cast<MDOUBLE>(vec.size());
	for (int i=0; i < vec.size(); ++i) {
		sum+=vec[i];
		sumSqr+=(vec[i]*vec[i]);
	}
	MDOUBLE res= sumSqr-(sum*sum/vecSize);
	res /= (vecSize-1.0);
	res = sqrt(res);
	return res;
}

void computeRelativeFreqsFollowingOneChanged(MDOUBLE  newValFreq, int indexNewFreq,Vdouble &freqs){
	MDOUBLE proportionAfterOptimization = 1.0 - newValFreq;
	MDOUBLE proportionBeforeOptimization = 1.0 - freqs[indexNewFreq];
	MDOUBLE sum = 0.0;
	for (int i=0; i<freqs.size(); ++i) {
		if (i==indexNewFreq){
			freqs[i] = newValFreq;
		}
		else {
			freqs[i] = proportionAfterOptimization*freqs[i]/proportionBeforeOptimization;
		}
		sum+=freqs[i];
	}
	if (!DEQUAL(sum,1.0)) {
		errorMsg::reportError("Error in computeRelativeFreqsFollowingOneChanged, sum not equal to 1");
	}
}


char mytolower(char in){return tolower(in);}
char mytoupper(char in){return toupper(in);}

void toLower(string& str) {
	transform (str.begin(), str.end(), str.begin(), mytolower);
}
void toUpper(string& str) {
	transform (str.begin(), str.end(), str.begin(), mytoupper);
}
string toUpper2(const string& str)
{
	string res("");
	transform (str.begin(), str.end(), res.begin(), mytoupper);
	return res;
}

bool allowCharSet(const string& allowableChars, const string& string2check) {
// this function check if all the character in string2check are made of characters from allowableChars
	for (int i=0; i < string2check.size(); ++i) {
		// now checking for string2check[i]
		int j;
		for (j=0; j < allowableChars.size(); ++j) {
			if (string2check[i] == allowableChars[j]) {
				break;
			}
		}
		if (j==allowableChars.size()) return false;
	}
	return true;
}

bool isCharInString(const string& stringToCheck, const char charToCheck) {
	for (int i=0; i < stringToCheck.size(); ++i ) {
		if (stringToCheck[i] == charToCheck) return true;
	}
	return false;
}

string double2string(const double x, const int lenght, bool round){
	
	// first getting the integer part:
	double x_abs = fabs(x);
	int theIntegerPart = static_cast<int>(x_abs);
	double theRemainingPart = fabs(x_abs-theIntegerPart);
	int integerRepresentingTheRemainingPart = static_cast<int>(theRemainingPart*pow(10.0,lenght));
	if (round) {
		integerRepresentingTheRemainingPart = static_cast<int>(theRemainingPart*pow(10.0,lenght)+0.5);
		if (integerRepresentingTheRemainingPart == pow(10.0,lenght)) {
			integerRepresentingTheRemainingPart = 0;
			theIntegerPart++;
		}
	}

	string part1 = int2string(theIntegerPart);
	string part2 = int2string(integerRepresentingTheRemainingPart);
	while (part2.length()<lenght){
		part2.insert(0, "0");
	}

	string result("");
	if (x < 0.0)
		result += "-";
	result += part1;
	result += ".";
	result += part2;

	// removing 0 from the end
	int i = result.length()-1;
	while (result[i]!='.' && i>0 && result[i]=='0'){
		result.erase(i);
		i--;
	}
	
	// removing "." if this is the last character in the string.
	if (result[result.length()-1]=='.')
	result.erase(result.length()-1);

	return result;
}

string int2string(const int num) {
// the input to this program is say 56
// the output is the string "56"
// this version of int2string is more portable 
// than sprintf like functions from c;
// or sstream of stl.
	if (num == 0) return "0";
	string res;
	int i = abs(num);
	
	
	int leftover;
	char k;
	while (i) {
		leftover = i%10;
		k = '0'+leftover;
		res = k+res;
		i/=10;
	}
	if (num<0) res = "-" + res;
	return res;
};

void printTime(ostream& out) {
	time_t ltime;
	time( &ltime );
	out<<"# the date is "<< ctime( &ltime )<<endl;
}

MDOUBLE string2double(const string& inString) {

	if (allowCharSet("0123456789.eE+-",inString) == false) {
		errorMsg::reportError(" error in function string2double ");
	}
	
	// first decide if the format is like 0.00343 (regularFormat) or
	// if it is in the form of 0.34e-006 for example

	bool regularFormat = true;
	int i;
	for (i=0; i < inString.size(); ++i) {
		if ((inString[i] == 'e' ) || (inString[i] == 'E' )) {
			regularFormat = false; 
			break;
		}
	}

	if (regularFormat) {
			MDOUBLE dDistance = atof(inString.c_str());
			return dDistance;
	}
	else {
		string b4TheExp;
		bool plusAfterTheExp = true;
		string afterTheExp;

		// b4 the exp
		for (i=0; i < inString.size(); ++i) {
			if ((inString[i] != 'e' ) && (inString[i] != 'E' )){
				b4TheExp += inString[i];
			}
			else break;
		}
		++i; //now standing after the exp;
		if (inString[i] == '-' ) {
			plusAfterTheExp = false;
			++i;
		}
		else if (inString[i] == '+' ) {
			plusAfterTheExp = true;
			++i;
		}
		else plusAfterTheExp = true; // the number is like 0.34e43

		for (; i < inString.size(); ++i) {
			afterTheExp += inString[i];
		}

		MDOUBLE res = 0.0;
		MDOUBLE dDistance = atof(b4TheExp.c_str());
		int exponentialFactor = atoi(afterTheExp.c_str());
		if (plusAfterTheExp) res = dDistance * pow(10.0,exponentialFactor);
		else res = dDistance * pow(10.0,-exponentialFactor);
		return res;
	}

	
}


bool checkThatFileExist(const string& fileName) {
	ifstream file1(fileName.c_str());
	if (!file1.good()) return false;
	file1.close();
	return true;
}

void putFileIntoVectorStringArray(istream &infile,vector<string> &inseqFile){
	inseqFile.clear();
	string tmp1;
	while (getline(infile,tmp1, '\n' ) ) {
		if (tmp1.empty()) continue;
		if (tmp1.size() > 100000) { // was 15000
			vector<string> err;
			err.push_back("Unable to read file. It is required that each line is no longer than");
			err.push_back("15000 characters. ");
			errorMsg::reportError(err,1); 
		}
		if (tmp1[tmp1.size()-1]=='\r') {// in case we are reading a dos file 
			tmp1.erase(tmp1.size()-1);
		}// remove the traling carrige-return
		inseqFile.push_back(tmp1);
	}
}

bool fromStringIterToInt(string::const_iterator & it, // ref must be here
						const string::const_iterator endOfString,
						int& res) {// the ref is so that we can use the it after the func.
	while (it != endOfString) {
		if ((*it == ' ') || (*it == '\t')) ++it;else break; // skeeping white spaces.
	}
	if (it != endOfString) {
		if (isdigit(*it) || (*it == '-')){
			int k = atoi(&*it);
			if (*it == '-') ++it;
			for (int numDig = abs(k); numDig>0; numDig/=10) ++it;
			res = k;
			return true;
		}
		else return false; //unable to read int From String
	}
	return false; //unable to read int From String
	
}

string* searchStringInFile(const string& string2find,
						   const int index,
						   const string& inFileName) {
	ifstream f;
	f.open(inFileName.c_str());
	if (!f.good()) {
		string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile"; 
		errorMsg::reportError(tmp);
	}

	string numm = int2string(index);
	string realString2find = string2find+numm;

	istream_iterator<string> is_string(f);
	istream_iterator<string> end_of_stream;

	is_string = find(is_string,end_of_stream,realString2find);
	if(is_string == end_of_stream) {f.close();return NULL;}
	else {
		is_string++;
		if(is_string == end_of_stream) {f.close();return NULL;};
		string* s = new string(*is_string);
		f.close();
		return s;
	}
	f.close();
	return NULL;
}
string* searchStringInFile(const string& string2find,
						   const string& inFileName) {// return the string that is AFTER the string to search.
	ifstream f;
	f.open(inFileName.c_str());
	if (!f.good()) {
		string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile"; 
		errorMsg::reportError(tmp);
	}
	string realString2find = string2find;

	istream_iterator<string> is_string(f);
	istream_iterator<string> end_of_stream;

	is_string = find(is_string,end_of_stream,realString2find);
	if(is_string == end_of_stream) {f.close();return NULL;}
	else {
		is_string++;
		if(is_string == end_of_stream) {f.close();return NULL;};
		string* s = new string(*is_string);
		f.close();
		return s;
	}
	f.close();
	return NULL;
}
bool doesWordExistInFile(const string& string2find,const string& inFileName) {
	ifstream f;
	f.open(inFileName.c_str());
	if (!f.good()) {
		string tmp = "Unable to open file name: "+inFileName+" in function searchStringInFile"; 
		errorMsg::reportError(tmp);
	}

	istream_iterator<string> is_string(f);
	istream_iterator<string> end_of_stream;

	is_string = find(is_string,end_of_stream,string2find);
	if(is_string == end_of_stream) return false;
	else return true;
}

string takeCharOutOfString(const string& charsToTakeOut, const string& fromString) {
	string finalString;
	for (int i=0; i<fromString.size(); ++i) {
		bool goodChar = true;
		for (int j=0; j < charsToTakeOut.size(); ++j) {
			if (fromString[i]== charsToTakeOut[j]) goodChar = false;
		}
		if (goodChar) finalString+=fromString[i];
	}
	return finalString;
}

bool DEQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/*1.192092896e-07F*/) {
	return (fabs(x1-x2)<epsilon);
}

bool DBIG_EQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/*1.192092896e-07F*/){
	return ((x1 > x2) || DEQUAL(x1, x2,epsilon));
}


bool DSMALL_EQUAL(const MDOUBLE x1, const MDOUBLE x2, MDOUBLE epsilon/*1.192092896e-07F*/){ 
	return ((x1 < x2) || DEQUAL(x1, x2,epsilon));
}

void createDir(const string & curDir, const string & dirName){// COPYRIGHT OF ITAY MAYROSE.
	string newDir;
	if (curDir == "")
		newDir = dirName;
	else
		newDir = curDir + string("/") + dirName;
#ifdef WIN32
	if( _mkdir(newDir.c_str()) == 0 ){
		LOG(5, << "Directory " <<newDir<<" was successfully created\n"<<endl);
    }else{
		if (errno == EEXIST) {
			LOG(5,<<"Directory already exist\n");
			return;
		} else {
		string err = "Problem creating directory " + newDir + " \n";
		LOG(5, << err << endl);
		errorMsg::reportError(err);
		}
	}
#else
	DIR * directory = opendir(newDir.c_str());
	if (directory == NULL) {
		string sysCall = "mkdir " + newDir;
        	system(sysCall.c_str());
	}
	else{
		string err = "Directory " + newDir + " already exists  \n";
		LOG(5, << err << endl);
		//errorMsg::reportError(err);
		
	}
#endif
}

//scale vecToScale so that its new average is AvgIn. return the scaling factor. 
MDOUBLE scaleVec(Vdouble& vecToScale, const MDOUBLE avgIn)
{
	int vecSize = vecToScale.size();
	MDOUBLE sum = 0;
	for (int x = 0; x<vecSize; ++x)
	{
		sum += vecToScale[x];
	}
	MDOUBLE avg = sum/vecSize;
	MDOUBLE scaleFactor = avgIn / avg;
	
	for (int i = 0; i<vecSize; ++i)
	{
		vecToScale[i] *= scaleFactor;
	}

	MDOUBLE newAvg = computeAverage(vecToScale);
	if (fabs(newAvg - avgIn) > 0.001)
		errorMsg::reportError(" problem - scalled average is not avgIn after scalling!!!");
	return scaleFactor;
}

//calculates the mean square error distance between 2 vectors:
MDOUBLE calcMSEDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
{
	MDOUBLE res = 0.0;
	if (oneRatesVec.size() != otherRatesVec.size())
		errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	
	for (int i=0; i<oneRatesVec.size(); ++i)
	{
		MDOUBLE diff = oneRatesVec[i] - otherRatesVec[i];
		res += diff * diff;
	}

	res /= oneRatesVec.size();
	return res;
}

//calculates the mean absolute deviations distance between 2 vectors:
MDOUBLE calcMADDistBetweenVectors(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
{
	MDOUBLE res = 0.0;
	if (oneRatesVec.size() != otherRatesVec.size())
		errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	
	for (int i=0; i<oneRatesVec.size(); ++i)
	{
		MDOUBLE diff = oneRatesVec[i] - otherRatesVec[i];
		res += fabs(diff);
	}

	res /= oneRatesVec.size();
	return res;
}

MDOUBLE calcRelativeMADDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold/*0.0*/)
{
	MDOUBLE res = 0.0;
	if (inferredValues.size() != trueValues.size())
		errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	
	int counter = 0;
	for (int i=0; i<inferredValues.size(); ++i)
	{
		if (trueValues[i] < threshhold)
			continue;
		MDOUBLE diff = fabs(inferredValues[i] - trueValues[i]);
		res += (diff / trueValues[i]);
		++counter;
	}

	res /= counter;
	return res;
}

//calculates the relative mean square error distance between 2 vectors:
//The difference from a regualar MSE is that for each position the squared difference is devided by the true value
//if threshhold > 0: if trueValues[i] < threshhold then do not add the rse for this psition to the result
MDOUBLE calcRelativeMSEDistBetweenVectors(const Vdouble& trueValues, const Vdouble& inferredValues, const MDOUBLE threshhold/*0.0*/ )
{
	MDOUBLE res = 0.0;
	if (inferredValues.size() != trueValues.size())
		errorMsg::reportError("the two vectors to be compared are not the same size in function SimulateRates::calcDistBetweenRatesVectors()");
	
	int counter = 0;
	for (int i=0; i<inferredValues.size(); ++i)
	{
		if (trueValues[i] < threshhold)
			continue;
		MDOUBLE diff = inferredValues[i] - trueValues[i];
		res += diff * diff / trueValues[i];
		++counter;
	}

	res /= counter;
	return res;
}


MDOUBLE calcRankCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
{	
	MDOUBLE res = 0.0;
	Vdouble orderVec1, orderVec2;
	MDOUBLE s_one = orderVec(oneRatesVec, orderVec1);
	MDOUBLE s_two = orderVec(otherRatesVec, orderVec2);
	int seqLength = oneRatesVec.size();

	MDOUBLE diff, sum_diff_sqr = 0;
	for (int i=0; i<seqLength; ++i)
	{
		diff = orderVec1[i] - orderVec2[i];
		sum_diff_sqr += pow(diff, 2);
	}

	MDOUBLE en3n = (seqLength * (pow(static_cast<double>(seqLength), 2.0) -1)); //n^3 -n
	MDOUBLE numerator = 1.0 - ((6/en3n) * (sum_diff_sqr + (s_one + s_two)/12.0));
	MDOUBLE denum = sqrt((1.0 - s_one/en3n) * (1.0 - s_two/en3n));
	res = numerator/ denum;
	return res;
}

/********************************************************************************************
calculates the spearman rank correlation value, Ofir implementation
*********************************************************************************************/
MDOUBLE calcRankCorrelation2(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
{
	int vecLen = oneRatesVec.size();
	if(vecLen != otherRatesVec.size())
		errorMsg::reportError("calcRankCorrelation2. Vectors length differ");	
	
	Vdouble orderVec1, orderVec2;
	orderRankNoTies(oneRatesVec, orderVec1);
	orderRankNoTies(otherRatesVec, orderVec2);
	
	MDOUBLE n = (double)vecLen;
	MDOUBLE dif,r,sum_dif=0;
	for (int i=0; i<vecLen; ++i)
	{
		dif = orderVec1[i] - orderVec2[i];
		sum_dif += pow(dif, 2);
	}
	r=1-(6*sum_dif)/(n*(pow (n,2)-1));
	return r;
}


/********************************************************************************************
Pearson's correlation co-efficient
*********************************************************************************************/
MDOUBLE calcPearsonCorrelation(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec, const int numberOfSignificantDigits)
{
	MDOUBLE res = 0.0;
	
	//MDOUBLE cov = calcCoVariance(oneRatesVec, otherRatesVec);
	//MDOUBLE sdOne = computeStd(oneRatesVec);
	//MDOUBLE sdOther = computeStd(otherRatesVec);
	//res = cov/(sdOne*sdOther);

	int seqLength = oneRatesVec.size();
	MDOUBLE meanOne = computeAverage(oneRatesVec);
	MDOUBLE meanOther = computeAverage(otherRatesVec);
	MDOUBLE cov = 0;
	MDOUBLE sdOne = 0;
	MDOUBLE sdOther = 0;
	for (int i=0; i<seqLength; ++i)
	{
		cov+=(oneRatesVec[i]-meanOne)*(otherRatesVec[i]-meanOther);	// crossProductSum  
		sdOne+=(oneRatesVec[i]-meanOne)*(oneRatesVec[i]-meanOne);		// sqDevX
		sdOther+=(otherRatesVec[i]-meanOther)*(otherRatesVec[i]-meanOther); //  sqDevY
	}
	res = cov/ (sqrt(sdOne)*sqrt(sdOther)); // no need to divide by seqLength -1, since canceled out

	MDOUBLE rounded = floorf(res * pow(10.0,numberOfSignificantDigits) + 0.5) / pow(10.0,numberOfSignificantDigits); // if not rounded, perfect correlations may return 1.000002, for example
	return rounded;
}

/********************************************************************************************
BenjaminiHochbergYekutieli procedure controls the false discovery rate
*********************************************************************************************/
MDOUBLE computeFDRthreshold(Vdouble& pVals, MDOUBLE levelOfFDRcontroled, bool isPValsSorted){
	MDOUBLE FDRthreshold = 0;
	if(!isPValsSorted)
		sort(pVals.begin(),pVals.end());
	int indexAll = pVals.size();
	for (int i=0; i<pVals.size(); ++i){		
		MDOUBLE correctedVal = (double)(i+1)/(double)indexAll *levelOfFDRcontroled;
		if( pVals[i] <= correctedVal){
			FDRthreshold = pVals[i];
		}			
	}
	return FDRthreshold;
}

MDOUBLE calcCoVariance(const Vdouble& oneRatesVec, const Vdouble& otherRatesVec)
{
	MDOUBLE cov = 0.0;
	MDOUBLE elemMulti = 0.0;
	int seqLength = oneRatesVec.size();
	MDOUBLE meanOne = computeAverage(oneRatesVec);
	MDOUBLE meanOther = computeAverage(otherRatesVec);
	for (int i=0; i<seqLength; ++i)
	{
		elemMulti += (oneRatesVec[i]-meanOne) * (otherRatesVec[i]-meanOther);
	}
	cov = elemMulti/(seqLength -1);
	return cov;
}


ostream &operator<<(ostream &out, const Vdouble &v){
  for (int j=0;j<v.size();++j)
    out<< v[j]<<" ";
  out <<endl;
  return(out);
}

ostream &operator<<(ostream &out, const VVdouble &m){
  for (int i=0;i<m.size();++i)
    out<<m[i];
  out <<endl;
  return(out);
}

void mult(Vdouble& vec, const MDOUBLE factor){
  for(int i=0;i<vec.size();++i)
    vec[i]*=factor;
}

void mult(VVdouble& vec, const MDOUBLE factor){
  for(int i=0;i<vec.size();++i)
    mult(vec[i],factor);
}



////orderVec - determine the relative order of vecIn
////returns orderVecOut[i] is the rank of vecIn[i]
////note that in case of ties the rank will be the midrank of the tied group
//Vdouble orderVec(const Vdouble& vecIn)
//{
//	int vecSize = vecIn.size();
//	Vdouble orderVecOut(vecSize);
//	vector< vecElem<MDOUBLE> > sortVec(vecSize);
//	for (int x =0; x < vecSize ; ++x)
//	{
//		sortVec[x].setValue(vecIn[x]);
//		sortVec[x].setPlace(x);
//	}
//	sort(sortVec.begin(), sortVec.end());
//	
//	//check for ties and correct their rank
//	Vdouble rankVec(vecSize);
//	MDOUBLE rank;
//	for (int i=0; i < vecSize; )
//	{
//		if (sortVec[i].getValue() != sortVec[i+1].getValue())
//		{//no tie
//			rankVec[i] = i;
//			++i;
//		}
//		else
//		{//tie
//			int to =0;
//			for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to)
//				;//check how far the tie goes
//			to--;
//			rank = 0.5*(to + i); 
//			for (int ji = i; ji<= to; ji++)
//			{
//				rankVec[ji] = rank;
//			}
//
//			i = to+1;
//		}
//	}
//	for (int j =0; j < vecSize; ++j) {
//		assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize));
//		orderVecOut[sortVec[j].getPlace()] = rankVec[j]; 
//	}
//	return orderVecOut;
//}

//orderVec - determine the relative order of vecIn
//orderVecOut[i] is the rank of vecIn[i]
//note that in case of ties the rank will be the midrank of the tied group
//return sum of n^3 - n where n is the number of elements in each tied group - see spearman rank correlation
MDOUBLE orderVec(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut)
{
	int vecSize = vecIn.size();
	orderVecOut.resize(vecSize);
	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	for (int x =0; x < vecSize ; ++x)
	{
		sortVec[x].setValue(vecIn[x]);
		sortVec[x].setPlace(x);
	}
	sort(sortVec.begin(), sortVec.end());
	//check for ties and correct their rank
	Vdouble rankVec(vecSize);
	MDOUBLE sumRankDif = 0; //sum(Fk^3 - Fk)

	MDOUBLE rank;
	for (int i=0; i < vecSize-1; ) // loop was till vecSize, out of range with sortVec[i+1]. Fixed (?)
	{
		if (sortVec[i].getValue() != sortVec[i+1].getValue())
		{//no tie
			rankVec[i] = i;
			++i;
		}
		else
		{//tie
			int to =0;
			for (to = i+1; (to<=vecSize) && (sortVec[i].getValue() == sortVec[to].getValue());++to)
				;//check how far the tie goes
			to--;
			rank = 0.5*(to + i); 
			for (int ji = i; ji<= to; ji++)
			{
				rankVec[ji] = rank;
			}

			int numTies = to - i +1; //number o fties in this group
			sumRankDif += numTies*numTies*numTies - numTies;
			i = to+1;
		}
	}
	
	for (int j =0; j < vecSize; ++j) {
	    assert ((rankVec[j] >= 0) && (rankVec[j] < vecSize));
	    orderVecOut[sortVec[j].getPlace()] = rankVec[j]; 
	}
	return sumRankDif;
}


void orderRankNoTies(const vector<MDOUBLE>& vecIn, vector<MDOUBLE>& orderVecOut)
{
	int vecSize = vecIn.size();
	orderVecOut.resize(vecSize);
	vector< vecElem<MDOUBLE> > sortVec(vecSize);
	for (int x =0; x < vecSize ; ++x)
	{
		sortVec[x].setValue(vecIn[x]);
		sortVec[x].setPlace(x);
	}
	sort(sortVec.begin(), sortVec.end());	
	for (int j =0; j < vecSize; ++j) {	   
	    orderVecOut[sortVec[j].getPlace()] = j; 
	}
	
}



void orderVec(const Vdouble& vecIn, vector< vecElem<MDOUBLE> >& orderVecOut)
{
	int vecSize = vecIn.size();
	orderVecOut.resize(vecSize);
	for (int x =0; x < vecSize ; ++x)
	{
		orderVecOut[x].setValue(vecIn[x]);
		orderVecOut[x].setPlace(x);
	}
	sort(orderVecOut.begin(), orderVecOut.end());
}


void splitString2(string str, string seperater, string &first, string &second)
{
     int i = (int)str.find(seperater); //find seperator
     if(i != -1)
     {
          int y = 0;
          if(!str.empty())
          {
               while(y != i)
               {
                    first += str[y++]; //creating first string
               }
               y = y+(int)seperater.length(); //jumping forward seperater length
               while(y != str.length())
               {
                    second += str[y++]; //creating second string
               }
               
          }
     }
     else
     {
          first = str;
          second = "NULL"; //if seperator is not there then second string == null 
     }
}


void splitString(const string& str,vector<string>& subStrs,const string& delimiter)
{
	// Skip delimiter at beginning.
	string::size_type lastPos = str.find_first_not_of(delimiter,0);
	// Find first "non-delimiter".
	string::size_type pos = str.find_first_of(delimiter,lastPos);

	while (string::npos != pos || string::npos != lastPos)
	{
		// Found a subStr, add it to the vector.
		subStrs.push_back(str.substr(lastPos,pos - lastPos));
		// Skip delimiter.  Note the "not_of"
		lastPos = str.find_first_not_of(delimiter,pos);
		// Find next "non-delimiter"
		pos = str.find_first_of(delimiter,lastPos);
	}
}

Vint getVintFromStr(const string& inStr)
{
	Vint res;
	vector<string> outStr;
	splitString(inStr, outStr, ",");
	for (int i = 0; i < outStr.size(); ++i)
	{
		int x = atoi(outStr[i].c_str());
		res.push_back(x);
	}
	return res;
}

string getStrFromVint(const Vint& inVec)
{
	string res("");
	for (int i = 0; i < inVec.size(); ++i)
	{
		if (i > 0)
			res += ",";
		res += int2string(inVec[i]);
	}
	return res;
}


/********************************************************************************************
gainLoss project
*********************************************************************************************/
int fromIndex2gainIndex(const int i, const int gainCategories, const int lossCategories){
	int gainIndex;
	if(lossCategories<=gainCategories){			
		gainIndex = (int)floor((double)i/(lossCategories) );
	}
	else{
		gainIndex = i%(gainCategories);			
	}
	return gainIndex;
}

int fromIndex2lossIndex(const int i, const int gainCategories, const int lossCategories){
	int lossIndex;
	if(lossCategories<=gainCategories){			
		lossIndex = i%(lossCategories);
	}
	else{
		lossIndex = (int)floor((double)i/(gainCategories) );		
	}
	return lossIndex;
}

int giveRandomState(const int alphabetSize, const int beginningState, const VVdouble &changeProbabilities) 
{
	for (int loop = 0 ; loop < 100000 ; ++loop) 
	{
		MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(1.0);
		MDOUBLE sum = 0.0;
		for (int state = 0; state < alphabetSize; ++state) 
		{
			sum += changeProbabilities[beginningState][state];
			if (theRandNum < sum) {
				return state;
			}
		}
	}
	errorMsg::reportError("giveRandomState: could not give random character. The reason is unknown.");
	return 1;

}

int giveRandomState(const int alphabetSize, const Vdouble &frequencies)  {
	for (int loop =0 ;loop<100000 ;loop++) {
		MDOUBLE theRandNum = talRandom::giveRandomNumberBetweenZeroAndEntry(0.999);
		MDOUBLE sum = 0.0;
		for (int j=0; j < alphabetSize;++j) {
			sum+=frequencies[j];
			if (theRandNum<sum) return j;
		}
	} 
	errorMsg::reportError("giveRandomState: Could not give random character. The reason is probably that the frequencies do not sum to one.");
	return 1;
}
/********************************************************************************************
additional Math functions
*********************************************************************************************/
int sign(MDOUBLE r){
	if(r>0) return 1; 
	else return -1;
}
MDOUBLE factorial(int x) {
	MDOUBLE fac = 1;
	for (int i=2; i<=x; i++) 
		fac *= i;
	return fac;
}
MDOUBLE BinomialCoeff(int a, int b) {
	return factorial(a)/(factorial(b)*factorial(a-b));
}

MDOUBLE exponentResolver(Vdouble& valuesVec){
	//First find largest element in valuesVec
	MDOUBLE largest = VERYSMALL;
	int largestIndex = -1;
	for(int i = 0;i < valuesVec.size();++i){
		if(valuesVec[i] > largest){
			largest = valuesVec[i];
			largestIndex = i;
		}
	}
	if(largestIndex == -1){
		errorMsg::reportError("exponentResolver: Could not find the largest element in the input vector");
		return 1;
	}
	//Now sum over all elements that are greater than -50. Note that exp(-50) is way smaller than the famous EPSILON so we are pretty safe from neglecting anything significant
	MDOUBLE sum = 1.0; 
	MDOUBLE cutoff = -50;
	for(int i = 0;i < valuesVec.size();++i){
		if(i == largestIndex) continue;
		if((valuesVec[i]-largest) < cutoff) continue;
		sum += exp(valuesVec[i]-largest);
	}
	MDOUBLE result = largest+log(sum);
	return(result);
}

MDOUBLE sumVdouble(const Vdouble & vec){
	MDOUBLE sum = 0.0;
	for(int i = 0;i < vec.size();++i){
		sum += vec[i];
	}
	return(sum);
}