1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
#include "GamMixtureOptimizer.h"
#include "someUtil.h"
#include "optGammaMixtureEM.h"
#include "optGammaMixtureLS.h"
#include <fstream>
#include <algorithm>
#include <ctime>
using namespace std;
GamMixtureOptimizer::GamMixtureOptimizer(stochasticProcess* pSp, const sequenceContainer& sc, const tree& inTree, unObservableData* unObservableData_p)
{
_pSc = ≻
_pTree = &inTree;
_pSp = pSp;
_unObservableData_p = unObservableData_p;
_tolOptSpecific = 0.001;
}
GamMixtureOptimizer::~GamMixtureOptimizer()
{
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
//findBestParamManyStarts: Finds the best gammaMixture from many starting points.
//The function starts form few starting points.
//For each point it tries to optimize the likellihood doing only a small number of iterations.
//It then picks the best points (highest likelihood) and continue the maximization for these points only.
//This can be repeated a number of times, each cycle with a different optimization algorithm.
//The best gammaMixture is stored in _sp and the best likelihood is returned.
//input Parameters:
//pointsNum: a vector with the number of points to peformed the current cycle of optimization.
//iterNum: the number of iterations to perform in each cycle.
//OptAlgs: the optimization algorithm to be performed in each cycle.
//tol = for determining convergence in the maximization process.
MDOUBLE GamMixtureOptimizer::findBestParamManyStarts(const Vint pointsNum, const Vint iterNum, const vector<OptimAlg> OptAlgs, const Vdouble tols, const Vdouble * pWeights, ofstream* pOutF/*= NULL*/)
{
//make sure that the number of points in each cycle is not bigger than the previous cycle.
int i;
for (i = 0; i < pointsNum.size()-1; ++i)
{
if (pointsNum[i] < pointsNum[i+1])
errorMsg::reportError("input error in GamMixtureOptimizer::findBestParamManyStarts()");
}
//create starting distributions
vector<mixtureDistribution*> distVec;
const mixtureDistribution * pMixture = getMixtureDist();
for (i = 0; i < pointsNum[0]; ++i)
{
//the first distribution will be the current one
if (i == 0)
distVec.push_back(new mixtureDistribution(*pMixture));
else
distVec.push_back(new mixtureDistribution(pMixture->getComponentsNum(), pMixture->categoriesForOneComponent(), LAGUERRE, 15, 15));
}
//make a small number of iterations for all random starts
int numOfOptCycles = pointsNum.size();
Vdouble likelihoodVec;
for (i = 0; i < numOfOptCycles; ++i)
{
if (i != 0)
{
vector<mixtureDistribution*> tmpDistVec(0);
//sort results and continue optimization only with the best (pointsNum[i]) points
Vdouble sortedL = likelihoodVec;
sort(sortedL.begin(),sortedL.end());
MDOUBLE threshold = sortedL[sortedL.size()- pointsNum[i]];
for (int j = 0; j < likelihoodVec.size(); ++j)
{
if (likelihoodVec[j] >= threshold)
tmpDistVec.push_back(distVec[j]);
else
delete distVec[j];
}
distVec.clear();
distVec = tmpDistVec;
}
likelihoodVec.clear();
likelihoodVec.resize(pointsNum[i]);
int c;
for (c = 0; c < pointsNum[i]; ++c)
{
cerr <<"optimizing point " <<c<<endl;
MDOUBLE ll = optimizeParam(distVec[c], iterNum[i], OptAlgs[i], tols[i], pWeights, pOutF);
cerr<<"pointi: "<<c<<" likelihood = "<<ll<<endl;
likelihoodVec[c] = ll;
}
}
Vdouble sortedL = likelihoodVec;
sort(sortedL.begin(),sortedL.end());
MDOUBLE bestL = sortedL[likelihoodVec.size() - 1];
for (i = 0; i < likelihoodVec.size(); ++i)
{
if (bestL == likelihoodVec[i])
{
_pSp->setDistribution(distVec[i]);
}
delete distVec[i];
}
distVec.clear();
return bestL;
}
MDOUBLE GamMixtureOptimizer::findBestParam(const OptimAlg alg, const int maxIterations, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
{
mixtureDistribution* pInDistribution = static_cast<mixtureDistribution*>(_pSp->distr());
return optimizeParam(pInDistribution, maxIterations, alg, tol, pWeights, pOutF);
}
MDOUBLE GamMixtureOptimizer::optimizeParam(mixtureDistribution* pInDistribution, const int maxIterations, const OptimAlg alg, const MDOUBLE tol, const Vdouble * pWeights, ofstream* pOutF)
{
MDOUBLE res = 0.0;
switch (alg)
{
case EM: {
optGammaMixtureEM emOpt(*_pSp, *_pSc, *_pTree);
res = emOpt.optimizeParam(pInDistribution, maxIterations, tol, _tolOptSpecific, pOutF);
break;
}
case ONE_DIM: {
optGammaMixtureLS lsOpt(_pSp, *_pSc, *_pTree,MAXIMUM_ALPHA_PARAM,MAXIMUM_BETA_PARAM,_unObservableData_p);
res = lsOpt.optimizeParam(pInDistribution, maxIterations, tol, pWeights, optGammaMixtureLS::ONE_DIM);
MDOUBLE resRecompute = likelihoodComputation::getTreeLikelihoodAllPosAlphTheSame(*_pTree,*_pSc,*_pSp,pWeights,_unObservableData_p);
if(!DEQUAL(res,resRecompute)){
LOGnOUT(3,<<"--- error: different likelihood after GamMixtureOptimizer::optimizeParam,diff= "<<res-resRecompute <<"\n");
}
break;
}
//case TX_CONJUGATE_DERIVATIVES:
// {
// txGamMixtureOptimizer txOpt(_pSp, *_pSc, *_pTree);
// txOpt.setOptimizationParameters(tol, _tolOptSpecific, _tolOptSpecific, _tolOptSpecific);
// res = txOpt.optimizeParam(pInDistribution, maxIterations, pWeights, alg, pOutF);
// break;
// }
//case NR_CONJUGATE_DERIVATIVES:
// {
// optGammaMixtureLS opt(_pSp, *_pSc, *_pTree);
// res = opt.optimizeParam(pInDistribution, maxIterations, tol, pWeights, optGammaMixtureLS::CONJUGATE_DERIVATIVES, pOutF);
// break;
// }
default:
errorMsg::reportError("unknown optimization algorithm in GamMixtureOptimizer::optimizeParam()");
}
return res;
}
|