File: Main.cc

package info (click to toggle)
probalign 1.4-10
links: PTS, VCS
area: main
in suites: bookworm, forky, sid, trixie
size: 368 kB
sloc: cpp: 4,964; makefile: 27; sh: 12
file content (1451 lines) | stat: -rwxr-xr-x 37,551 bytes
parent folder | download | duplicates (4)
/////////////////////////////////////////////////////////////////
// Main.cc
//
// Main routines for PROBALIGN 1.4 program (Nov 2010).
// 
/////////////////////////////////////////////////////////////////

#include "SafeVector.h"
#include "MultiSequence.h"
#include "EvolutionaryTree.h"
#include "SparseMatrix.h"
#include <math.h>
#include <string>
#include <iomanip>
#include <iostream>
#include <cerrno>
#include <time.h>
#include <climits>
#include <cstring>

//struct for column reliability

typedef struct
{
  int columnNo;
  float probProduct;

}columnReliability;

columnReliability *column;

//Usman
string posteriorProbsFilename = "";
string parametersInputFilename = "";
string parametersOutputFilename = "no training";
string annotationFilename = "";

bool allscores = true;
bool enableVerbose = false;
bool enableAllPairs = false;
bool enableAnnotation = false;
bool enableClustalWOutput = false;
bool enableAlignOrder = false;

int numConsistencyReps = 2;
int numIterativeRefinementReps = 100;

float cutoff = 0;
float gapOpenPenalty = 0;
float gapContinuePenalty = 0;


const int MIN_CONSISTENCY_REPS = 0;
const int MAX_CONSISTENCY_REPS = 5;
const int MIN_ITERATIVE_REFINEMENT_REPS = 0;
const int MAX_ITERATIVE_REFINEMENT_REPS = 1000;
string infilename;

int flag_gui=0;   //0: no gui related o/p 
                  //1: gui related o/p generated
int flag_ppscore=0; //0: no pp score sequence added to o/p fasta alignment
                    //1: pp score seq added to o/p fasta alignment

///////////////////////////////
// global scoring matrix variables
//////////////////////////////
float g_gap_open1, g_gap_open2, g_gap_ext1, g_gap_ext2;
char *aminos, *bases, matrixtype[20] = "gonnet_160";
int subst_index[26];

float sub_matrix[26][26];
float scorez_matrix[26][26];
int firstread = 0;		//this makes sure that matrices are read only once 

float TEMPERATURE = 5;
int MATRIXTYPE = 160;
int prot_nuc = 0;		//0=prot, 1=nucleotide

float GAPOPEN = 0;
float GAPEXT = 0;
/////////////////////////////////////////////////////////////////
//extern function prototypes
////////////////////////////////////////////////////////////////

extern pair < SafeVector < char >*, float >ComputeAlignment(int seq1Length,
							    int seq2Length,
							    const VF &
							    posterior);

extern pair < SafeVector < char >*,
    float >ComputeAlignmentWithGapPenalties(MultiSequence * align1,
					    MultiSequence * align2,
					    const VF & posterior,
					    int numSeqs1, int numSeqs2,
					    float gapOpenPenalty,
					    float gapContinuePenalty);

extern VF *BuildPosterior(MultiSequence * align1, MultiSequence * align2,
			  const SafeVector < SafeVector <
			  SparseMatrix * > >&sparseMatrices, float cutoff =
			  0.0f);




extern VF *ComputePostProbs(int a, int b, string s1, string s2);


//argument support
typedef struct {
    char input[30];
    int matrix;
    int N;
    float T;
    float beta;
    char opt;			//can be 'P' or 'M'
    float gapopen;
    float gapext;
} argument_decl;


argument_decl argument;

extern inline void read_sustitution_matrix(char *fileName);
extern void setmatrixtype(int le);
extern inline int matrixtype_to_int();
extern inline void read_dna_matrix();
extern inline void read_vtml_la_matrix();
extern void init_arguments();

/////////////////////////////////////////////////////////////////
// Function prototypes
/////////////////////////////////////////////////////////////////

void PrintHeading();

MultiSequence *DoAlign(MultiSequence * sequence);

SafeVector < string > ParseParams(int argc, char **argv);

MultiSequence *ComputeFinalAlignment(const TreeNode * tree,
				     MultiSequence * sequences,
				     const SafeVector < SafeVector <
				     SparseMatrix * > >&sparseMatrices);
MultiSequence *AlignAlignments(MultiSequence * align1,
			       MultiSequence * align2,
			       const SafeVector < SafeVector <
			       SparseMatrix * > >&sparseMatrices);
SafeVector < SafeVector <
    SparseMatrix * > >DoRelaxation(MultiSequence * sequences,
				  SafeVector < SafeVector <
				  SparseMatrix * > >&sparseMatrices);
void Relax(SparseMatrix * matXZ, SparseMatrix * matZY, VF & posterior);
void Relax1(SparseMatrix * matXZ, SparseMatrix * matZY, VF & posterior);

set < int >GetSubtree(const TreeNode * tree);
void TreeBasedBiPartitioning(const SafeVector < SafeVector <
			     SparseMatrix * > >&sparseMatrices,
			     MultiSequence * &alignment,
			     const TreeNode * tree);
void DoIterativeRefinement(const SafeVector < SafeVector <
			   SparseMatrix * > >&sparseMatrices,
			   MultiSequence * &alignment);
			   
//java gui related change
void WriteAnnotation(MultiSequence * alignment,
		     const SafeVector < SafeVector <
		     SparseMatrix * > >&sparseMatrices);
		     
//java gui related change		     
float ComputeScore(const SafeVector < pair < int, int > >&active,
		 const SafeVector < SafeVector <
		 SparseMatrix * > >&sparseMatrices);




/////////////////////////////////////////////////////////////////
// main()
//
// Calls all initialization routines and runs the PROBCONS
// aligner.
/////////////////////////////////////////////////////////////////

int main(int argc, char **argv)
{

    // print PROBCONS heading
    PrintHeading();

    // parse program parameters
    SafeVector < string > sequenceNames = ParseParams(argc, argv);
    infilename = sequenceNames[0];


    // now, we'll process all the files given as input.  If we are given
    // several filenames as input, then we'll load all of those sequences
    // simultaneously.


    // load all files together
    MultiSequence *sequences = new MultiSequence();
    assert(sequences);
    for (int i = 0; i < (int) sequenceNames.size(); i++)
    {
	cerr << "Loading sequence file: " << sequenceNames[i] << endl;
	sequences->LoadMFA(sequenceNames[i], true);
    }


    // now, we can perform the alignments and write them out

    MultiSequence *alignment = DoAlign(sequences);

    if (!enableAllPairs)
    {
	if (enableClustalWOutput)
	    alignment->WriteALN(cout);
	else
	    alignment->WriteMFA(cout);
    }

    delete alignment;
    delete sequences;

}

/////////////////////////////////////////////////////////////////
// PrintHeading()
//
// Prints heading for PROBCONS program.
/////////////////////////////////////////////////////////////////

void PrintHeading()
{
    cerr << endl << "PROBALIGN Version 1.4 (Nov 2010) "<<
	"aligns multiple protein sequences and prints to the"<<endl<<"standard output. "
	"Written by Satish Chikkagoudar and Usman Roshan using code from PROBCONS"<<endl<<
        "version 1.1 (written by Chuong Do) and based upon probA (written by Ulrike Muckstein)."<< endl<<endl;
}

/////////////////////////////////////////////////////////////////
// DoAlign()
//
// First computes all pairwise posterior probability matrices.
// Then, computes new parameters if training, or a final
// alignment, otherwise.
/////////////////////////////////////////////////////////////////

MultiSequence *DoAlign(MultiSequence * sequences)
{
    assert(sequences);

    const int numSeqs = sequences->GetNumSequences();
    VVF distances(numSeqs, VF(numSeqs, 0));
    SafeVector < SafeVector < SparseMatrix * > >sparseMatrices(numSeqs,
							      SafeVector <
							      SparseMatrix
							      * >(numSeqs,
								  NULL));


    //initialize arguments
    init_arguments();

    int a, b, c=1;
    
    if(flag_gui==1)
    {
       cout<<numSeqs<<endl;
       cout<<"@+"<<endl;
    }

    // do all pairwise alignments for posterior probability matrices
    for (a = 0; a < numSeqs - 1; a++)
    {
	for (b = a + 1; b < numSeqs; b++)
	{
	    if(flag_gui==1)
          {
              cout<<c++<<endl;
	    }
          Sequence *seq1 = sequences->GetSequence(a);
	    Sequence *seq2 = sequences->GetSequence(b);

	    // verbose output
	    if (enableVerbose)
	    {
		cerr << "Computing posterior matrix: (" << a +
		    1 << ") " << seq1->GetHeader() << " vs. " << "(" << b +
		    1 << ") " << seq2->GetHeader() << " -- " << endl;

		cerr << a << "   " << strlen(seq1->GetString().
					     c_str()) << endl;
		cerr << b << "   " << strlen(seq2->GetString().
					     c_str()) << endl <<
		    "-------------" << endl;
	    }




	    // if we are training, then we'll simply want to compute the
	    // expected counts for each region within the matrix separately;
	    // otherwise, we'll need to put all of the regions together and
	    // assemble a posterior probability match matrix




	    VF *posterior;
                posterior =
		ComputePostProbs(a, b, seq1->GetString(),
	    			 seq2->GetString());
	    assert(posterior);
        
	    // compute sparse representations
	    sparseMatrices[a][b] =
		new SparseMatrix(seq1->GetLength(), seq2->GetLength(),
				 *posterior);
	    sparseMatrices[b][a] = NULL;



	    // perform the pairwise sequence alignment
	    pair < SafeVector < char >*, float >alignment =
		ComputeAlignment(seq1->GetLength(),
				 seq2->GetLength(),
				 *posterior);

	    // compute "expected accuracy" distance for evolutionary tree computation
	    float distance = alignment.second / min(seq1->GetLength(),
						    seq2->GetLength());
	    distances[a][b] = distances[b][a] = distance;

	    if (enableVerbose)
		cerr << setprecision(10) << distance << endl;

	    delete alignment.first;


	    delete posterior;



	}
    }

    if(flag_gui==1)
    {
 
         cout<<"@-"<<endl;
    
    }

    // perform the consistency transformation the desired number of times

    for (int r = 0; r < numConsistencyReps; r++)
    {
	SafeVector < SafeVector < SparseMatrix * > >newSparseMatrices =
	    DoRelaxation(sequences, sparseMatrices);

	// now replace the old posterior matrices
	for (int i = 0; i < numSeqs; i++)
	{
	    for (int j = 0; j < numSeqs; j++)
	    {
		delete sparseMatrices[i][j];
		sparseMatrices[i][j] = newSparseMatrices[i][j];
	    }
	}
    }
  
    MultiSequence *finalAlignment = NULL;



    // now if we still need to do a final multiple alignment        
    if (enableVerbose)
	cerr << endl;

    // compute the evolutionary tree
    TreeNode *tree = TreeNode::ComputeTree(distances);

    tree->Print(cerr, sequences);
    cerr << endl;

    // make the final alignment
    finalAlignment =
	ComputeFinalAlignment(tree, sequences, sparseMatrices);

    // build annotation
    if (enableAnnotation)
    {
	WriteAnnotation(finalAlignment, sparseMatrices);
    }

    delete tree;




    //CLEANUP 
    // delete sparse matrices
    for (int a = 0; a < numSeqs - 1; a++)
    {
	for (int b = a + 1; b < numSeqs; b++)
	{
	    delete sparseMatrices[a][b];
	    delete sparseMatrices[b][a];
	}
    }

   


    return finalAlignment;



    return NULL;
}

/////////////////////////////////////////////////////////////////
// GetInteger()
//
// Attempts to parse an integer from the character string given.
// Returns true only if no parsing error occurs.
/////////////////////////////////////////////////////////////////

bool GetInteger(char *data, int *val)
{
    char *endPtr;
    long int retVal;

    assert(val);

    errno = 0;
    retVal = strtol(data, &endPtr, 0);
    if (retVal == 0 && (errno != 0 || data == endPtr))
	return false;
    if (errno != 0 && (retVal == LONG_MAX || retVal == LONG_MIN))
	return false;
    if (retVal < (long) INT_MIN || retVal > (long) INT_MAX)
	return false;
    *val = (int) retVal;
    return true;
}

/////////////////////////////////////////////////////////////////
// GetFloat()
//
// Attempts to parse a float from the character string given.
// Returns true only if no parsing error occurs.
/////////////////////////////////////////////////////////////////

bool GetFloat(char *data, float *val)
{
    char *endPtr;
    double retVal;

    assert(val);

    errno = 0;
    retVal = strtod(data, &endPtr);
    if (retVal == 0 && (errno != 0 || data == endPtr))
	return false;
    if (errno != 0 && (retVal >= 1000000.0 || retVal <= -1000000.0))
	return false;
    *val = (float) retVal;
    return true;
}

/////////////////////////////////////////////////////////////////
// ParseParams()
//
// Parse all command-line options.
/////////////////////////////////////////////////////////////////

SafeVector < string > ParseParams(int argc, char **argv)
{

    if (argc < 2)
    {

	cerr <<
	    "PROBALIGN 1.4 comes with ABSOLUTELY NO WARRANTY."
	    << endl <<
	    "This is free software, and you are welcome to redistribute it under certain conditions."
	    << endl << "See the README file for details." << endl << endl <<
	    "Usage:" << endl <<
	    "       probalign [OPTION]... [MFAFILE]..." << endl <<
	    endl << "Description:" << endl <<
	    "       Align sequences in MFAFILE(s) and print result to standard output"
	    << endl << endl << "       -clustalw" << endl <<
	    "              use CLUSTALW output format instead of MFA" <<
	    endl << endl << "       -v, --verbose" << endl <<
	    "              report progress while aligning (default: " <<
	    (enableVerbose ? "on" : "off") << ")" << endl << endl <<
	    "       -a, --alignment-order" << endl <<
	    "              print sequences in alignment order rather than input order (default: "
	    << (enableAlignOrder ? "on" : "off") << ")" << endl << endl <<
	    "      -T, -temperature" << endl <<
	    "             Sets the thermodynamic temperature parameter "<<endl
		<<"           (default: 5 (for protein data mode), 1 ( for nucleotide data mode))."
	    << endl << endl << "      -score_matrix, --score_matrix" <<
	    endl <<
	    "             Sets the type of scoring matrix used to calculate the posterior probabilities"
	    << endl <<
	    "             (default: gonnet_160, representing gonnet 160, refer README for details)"
	    << endl << endl << "      -go, --gap-open" << endl <<
	    "             This option can be used to specify the gap open parameter. The "
	    << endl <<
		"             default for Gonnet 160 (protein) is 22 and nucleotide (simple matrix)"<<endl<<
		"             is 4."
	    << endl << endl <<
	    "      -ge, --gap-extension    " << endl <<
	    "             This option can be used to specify the gap extension parameter. The "
	    << endl <<
		"             default for Gonnet 160 (protein) is 1 and nucleotide (simple matrix)"<<endl<<
		"             is 0.25."
	    << endl << endl <<
	    "      -nuc  " << endl <<
	    "             Specify this option to indicate that inputted sequences are nucleotide sequences"
	    << endl << endl << "      -prot  " << endl <<
	    "             Specify this option to indicate that inputted sequences are protein sequences [DEFAULT]"
	    << endl << endl<<  "      -showPP "<<endl<<
          "             Outputs the posterior probabilities of alignment columns as a new sequence named Posterior Probabilities"<<endl<< 
          "             (The probability values are scaled to be between integers between 0 and 9)."<<endl<<endl;

	exit(1);
    }

    SafeVector < string > sequenceNames;
    
    float tempFloat;
    int tempInt;
    int has_sequence_argument = 0;
    
    for (int i = 1; i < argc; i++)
    {
	if (argv[i][0] == '-')
	{

	    if (!strcmp(argv[i], "-p") || !strcmp(argv[i], "--paramfile"))
	    {
		if (i < argc - 1)
		    parametersInputFilename = string(argv[++i]);
		else
		{
		    cerr << "ERROR: Filename expected for option " <<
			argv[i] << endl;
		    exit(1);
		}
	    }
	    else if (!strcmp(argv[i], "-nuc"))
	    {
		prot_nuc = 1;
		if (!strcmp(matrixtype, "gonnet_160"))
		    strcpy(matrixtype, "nuc_simple");

		if (GAPOPEN == 0)
		    GAPOPEN = 3;

		if (GAPEXT == 0)
		    GAPEXT = 0.25;

		if (TEMPERATURE == 5)
		    TEMPERATURE = 1;
	    }
	    else if (!strcmp(argv[i], "-prot"))
	    {

		prot_nuc = 0;

		if (GAPOPEN == 0)
		    GAPOPEN = 22;

		if (GAPEXT == 0)
		    GAPEXT = 1;


	    }


	    // number of randomized partitioning iterative refinement passes
	    //uncomment to make value of numIterativeRefinementReps modifiable
	    
	       else if (!strcmp (argv[i], "-ir")
	       || !strcmp (argv[i], "--iterative-refinement"))
	       {
	       if (i < argc - 1)
	       {
	       if (!GetInteger (argv[++i], &tempInt))
	       {
	       cerr << "ERROR: Invalid integer following option " <<
	       argv[i - 1] << ": " << argv[i] << endl;
	       exit (1);
	       }
	       else
	       {
	       if (tempInt < MIN_ITERATIVE_REFINEMENT_REPS
	       || tempInt > MAX_ITERATIVE_REFINEMENT_REPS)
	       {
	       cerr << "ERROR: For option " << argv[i -
	       1] <<
	       ", integer must be between " <<
	       MIN_ITERATIVE_REFINEMENT_REPS << " and " <<
	       MAX_ITERATIVE_REFINEMENT_REPS << "." << endl;
	       exit (1);
	       }
	       else
	       numIterativeRefinementReps = tempInt;
	       }
	       }
	       else
	       {
	       cerr << "ERROR: Integer expected for option " << argv[i] <<
	       endl;
	       exit (1);
	       }
	       }
            
	    // gap open penalty 
	    else if (!strcmp(argv[i], "-go")
		     || !strcmp(argv[i], "--gap-open"))
	    {
		if (i < argc - 1)
		{
		    if (!GetFloat(argv[++i], &tempFloat))
		    {
			cerr <<
			    "ERROR: Invalid floating-point value following option "
			    << argv[i - 1] << ": " << argv[i] << endl;
			exit(1);
		    }
		    else
		    {
			if (tempFloat < 0)
			{
			    cerr << "ERROR: For option " << argv[i -
								 1] <<
				", floating-point value must be positive."
				<< endl;
			    exit(1);
			}
			else
			    GAPOPEN = tempFloat;
		    }
		}
		else
		{
		    cerr <<
			"ERROR: Floating-point value expected for option "
			<< argv[i] << endl;
		    exit(1);
		}
	    }

	    // gap extension penalty
	    else if (!strcmp(argv[i], "-ge")
		     || !strcmp(argv[i], "--gap-extension"))
	    {
		if (i < argc - 1)
		{
		    if (!GetFloat(argv[++i], &tempFloat))
		    {
			cerr <<
			    "ERROR: Invalid floating-point value following option "
			    << argv[i - 1] << ": " << argv[i] << endl;
			exit(1);
		    }
		    else
		    {
			if (tempFloat < 0)
			{
			    cerr << "ERROR: For option " << argv[i -
								 1] <<
				", floating-point value must be positive."
				<< endl;
			    exit(1);
			}
			else
			    GAPEXT = tempFloat;
		    }
		}
		else
		{
		    cerr <<
			"ERROR: Floating-point value expected for option "
			<< argv[i] << endl;
		    exit(1);
		}
	    }

         // feeds the java gui
	   else if (!strcmp (argv[i], "-gui")){
	       flag_gui=1;
             enableAnnotation = true;
 
           } 
	  // add pp score seq to output alignment
	   else if (!strcmp (argv[i], "-showPP")){
	       flag_ppscore=1;
             enableAnnotation = true;
           } 


            // generate column scores
	   else if (!strcmp (argv[i], "-columnscore")){
	     
             enableAnnotation = true;
 
           } 
	                
	    // clustalw output format
	    else if (!strcmp(argv[i], "-clustalw"))
	    {
		enableClustalWOutput = true;
	    }

	    // verbose reporting
	    else if (!strcmp(argv[i], "-v")
		     || !strcmp(argv[i], "--verbose"))
	    {
		enableVerbose = true;
	    }

	    // alignment order
	    else if (!strcmp(argv[i], "-a")
		     || !strcmp(argv[i], "--alignment-order"))
	    {
		enableAlignOrder = true;
	    }
	    else if (!strcmp(argv[i], "-T")
		     || !strcmp(argv[i], "--temperature"))
	    {
		if (i < argc - 1)
		{
		    if (!GetFloat(argv[++i], &tempFloat))
		    {
			cerr <<
			    "ERROR: Invalid floating-point value following option "
			    << argv[i - 1] << ": " << argv[i] << endl;
			exit(1);
		    }
		    else
		    {
			if (tempFloat == 0)
			{
			    cerr <<
				"ERROR: Non-Zero Integer expected for option "
				<< argv[i] << endl;
			    exit(1);

			}
			else
			    TEMPERATURE = tempFloat;
		    }
		}
		else
		{
		    cerr <<
			"ERROR: Floating-point value expected for option "
			<< argv[i] << endl;
		    exit(1);
		}

	    }
	    
		//matrix filenames are read by this option
		else if (!strcmp(argv[i], "-score_matrix")
			 || !strcmp(argv[i], "--score_matrix"))
	    {
		if (i < argc - 1)
		{
		    strcpy(matrixtype, argv[++i]);

		}
		else
		{
		    cerr << "ERROR: Value expected for option " << argv[i]
			<< endl;
		    exit(1);
		}

	    }

	    // bad arguments
	    else
	    {
		cerr << "ERROR: Unrecognized option: " << argv[i] << endl;
		exit(1);
	    }


	}
	else
	{
	    has_sequence_argument = 1;
	    sequenceNames.push_back(string(argv[i]));
	}
    }

    if (!has_sequence_argument) {
	cerr << "ERROR: No MFAFILE specified" << endl;
	exit(1);
    }

    return sequenceNames;
}


/////////////////////////////////////////////////////////////////
// ProcessTree()
//
// Process the tree recursively.  Returns the aligned sequences
// corresponding to a node or leaf of the tree.
/////////////////////////////////////////////////////////////////

MultiSequence *ProcessTree(const TreeNode * tree,
			   MultiSequence * sequences,
			   const SafeVector < SafeVector <
			   SparseMatrix * > >&sparseMatrices)
{
    MultiSequence *result;

    // check if this is a node of the alignment tree
    if (tree->GetSequenceLabel() == -1)
    {
	MultiSequence *alignLeft =
	    ProcessTree(tree->GetLeftChild(), sequences, sparseMatrices);
	MultiSequence *alignRight =
	    ProcessTree(tree->GetRightChild(), sequences, sparseMatrices);

	assert(alignLeft);
	assert(alignRight);

	result = AlignAlignments(alignLeft, alignRight, sparseMatrices);
	assert(result);

	delete alignLeft;
	delete alignRight;
    }

    // otherwise, this is a leaf of the alignment tree
    else
    {
	result = new MultiSequence();
	assert(result);
	result->AddSequence(sequences->
			    GetSequence(tree->GetSequenceLabel())->
			    Clone());
    }

    return result;
}

/////////////////////////////////////////////////////////////////
// ComputeFinalAlignment()
//
// Compute the final alignment by calling ProcessTree(), then
// performing iterative refinement as needed.
/////////////////////////////////////////////////////////////////

MultiSequence *ComputeFinalAlignment(const TreeNode * tree,
				     MultiSequence * sequences,
				     const SafeVector < SafeVector <
				     SparseMatrix * > >&sparseMatrices)
{

    MultiSequence *alignment =
	ProcessTree(tree, sequences, sparseMatrices);

    if (enableAlignOrder)
    {
	alignment->SaveOrdering();
	enableAlignOrder = false;
    }

    // iterative refinement
    for (int i = 0; i < numIterativeRefinementReps; i++)
	DoIterativeRefinement(sparseMatrices, alignment);
    cerr << endl;

    // return final alignment
    return alignment;
}

/////////////////////////////////////////////////////////////////
// AlignAlignments()
//
// Returns the alignment of two MultiSequence objects.
/////////////////////////////////////////////////////////////////

MultiSequence *AlignAlignments(MultiSequence * align1,
			       MultiSequence * align2,
			       const SafeVector < SafeVector <
			       SparseMatrix * > >&sparseMatrices)
{

    // print some info about the alignment
    if (enableVerbose)
    {
	for (int i = 0; i < align1->GetNumSequences(); i++)
	    cerr << ((i == 0) ? "[" : ",") << align1->GetSequence(i)->
		GetLabel();
	cerr << "] vs. ";
	for (int i = 0; i < align2->GetNumSequences(); i++)
	    cerr << ((i == 0) ? "[" : ",") << align2->GetSequence(i)->
		GetLabel();
	cerr << "]: ";
    }

    VF *posterior = BuildPosterior(align1, align2, sparseMatrices, cutoff);
    pair < SafeVector < char >*, float >alignment;

    // choose the alignment routine depending on the "cosmetic" gap penalties used
    if (gapOpenPenalty == 0 && gapContinuePenalty == 0)
	alignment =
	    ComputeAlignment(align1->GetSequence(0)->GetLength(),
			     align2->GetSequence(0)->GetLength(),
			     *posterior);
    else
	alignment =
	    ComputeAlignmentWithGapPenalties(align1, align2, *posterior,
					     align1->GetNumSequences(),
					     align2->GetNumSequences(),
					     gapOpenPenalty,
					     gapContinuePenalty);

    delete posterior;

    if (enableVerbose)
    {

	// compute total length of sequences
	int totLength = 0;
	for (int i = 0; i < align1->GetNumSequences(); i++)
	    for (int j = 0; j < align2->GetNumSequences(); j++)
		totLength +=
		    min(align1->GetSequence(i)->GetLength(),
			align2->GetSequence(j)->GetLength());

	// give an "accuracy" measure for the alignment
	cerr << alignment.second / totLength << endl;
    }

    // now build final alignment
    MultiSequence *result = new MultiSequence();
    for (int i = 0; i < align1->GetNumSequences(); i++)
	result->AddSequence(align1->GetSequence(i)->
			    AddGaps(alignment.first, 'X'));
    for (int i = 0; i < align2->GetNumSequences(); i++)
	result->AddSequence(align2->GetSequence(i)->
			    AddGaps(alignment.first, 'Y'));
    if (!enableAlignOrder)
	result->SortByLabel();

    // free temporary alignment
    delete alignment.first;

    return result;
}

/////////////////////////////////////////////////////////////////
// DoRelaxation()
//
// Performs one round of the consistency transformation.  The
// formula used is:
//                     1
//    P'(x[i]-y[j]) = ---  sum   sum P(x[i]-z[k]) P(z[k]-y[j])
//                    |S| z in S  k
//
// where S = {x, y, all other sequences...}
//
/////////////////////////////////////////////////////////////////

SafeVector < SafeVector <
    SparseMatrix * > >DoRelaxation(MultiSequence * sequences,
				  SafeVector < SafeVector <
				  SparseMatrix * > >&sparseMatrices)
{
    const int numSeqs = sequences->GetNumSequences();

    SafeVector < SafeVector < SparseMatrix * > >newSparseMatrices(numSeqs,
								 SafeVector
								 <
								 SparseMatrix
								 *
								 >(numSeqs,
								   NULL));

    // for every pair of sequences
    for (int i = 0; i < numSeqs; i++)
    {
	for (int j = i + 1; j < numSeqs; j++)
	{
	    Sequence *seq1 = sequences->GetSequence(i);
	    Sequence *seq2 = sequences->GetSequence(j);

	    if (enableVerbose)
		cerr << "Relaxing (" << i +
		    1 << ") " << seq1->GetHeader() << " vs. " << "(" << j +
		    1 << ") " << seq2->GetHeader() << ": ";

	    // get the original posterior matrix
	    VF *posteriorPtr = sparseMatrices[i][j]->GetPosterior();
	    assert(posteriorPtr);
	    VF & posterior = *posteriorPtr;

	    const int seq1Length = seq1->GetLength();
	    const int seq2Length = seq2->GetLength();

	    // contribution from the summation where z = x and z = y
	    for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++)
		posterior[k] += posterior[k];

	    if (enableVerbose)
		cerr << sparseMatrices[i][j]->GetNumCells() << " --> ";

	    // contribution from all other sequences
	    for (int k = 0; k < numSeqs; k++)
		if (k != i && k != j)
		{
		    if (k < i)
			Relax1(sparseMatrices[k][i], sparseMatrices[k][j],
			       posterior);
		    else if (k > i && k < j)
			Relax(sparseMatrices[i][k], sparseMatrices[k][j],
			      posterior);
		    else
		    {
			SparseMatrix *temp =
			    sparseMatrices[j][k]->ComputeTranspose();
			Relax(sparseMatrices[i][k], temp, posterior);
			delete temp;
		    }
		}

	    // now renormalization
	    for (int k = 0; k < (seq1Length + 1) * (seq2Length + 1); k++)
		posterior[k] /= numSeqs;

	    // mask out positions not originally in the posterior matrix
	    SparseMatrix *matXY = sparseMatrices[i][j];
	    for (int y = 0; y <= seq2Length; y++)
		posterior[y] = 0;
	    for (int x = 1; x <= seq1Length; x++)
	    {
		SafeVector < PIF >::iterator XYptr = matXY->GetRowPtr(x);
		SafeVector < PIF >::iterator XYend =
		    XYptr + matXY->GetRowSize(x);
		VF::iterator base =
		    posterior.begin() + x * (seq2Length + 1);
		int curr = 0;
		while (XYptr != XYend)
		{

		    // zero out all cells until the first filled column
		    while (curr < XYptr->first)
		    {
			base[curr] = 0;
			curr++;
		    }

		    // now, skip over this column
		    curr++;
		    ++XYptr;
		}

		// zero out cells after last column
		while (curr <= seq2Length)
		{
		    base[curr] = 0;
		    curr++;
		}
	    }

	    // save the new posterior matrix
	    newSparseMatrices[i][j] =
		new SparseMatrix(seq1->GetLength(), seq2->GetLength(),
				 posterior);
	    newSparseMatrices[j][i] = NULL;

	    if (enableVerbose)
		cerr << newSparseMatrices[i][j]->GetNumCells() << " -- ";

	    delete posteriorPtr;

	    if (enableVerbose)
		cerr << "done." << endl;
	}
    }

    return newSparseMatrices;
}

/////////////////////////////////////////////////////////////////
// Relax()
//
// Computes the consistency transformation for a single sequence
// z, and adds the transformed matrix to "posterior".
/////////////////////////////////////////////////////////////////

void Relax(SparseMatrix * matXZ, SparseMatrix * matZY, VF & posterior)
{

    assert(matXZ);
    assert(matZY);

    int lengthX = matXZ->GetSeq1Length();
    int lengthY = matZY->GetSeq2Length();
    assert(matXZ->GetSeq2Length() == matZY->GetSeq1Length());

    // for every x[i]
    for (int i = 1; i <= lengthX; i++)
    {
	SafeVector < PIF >::iterator XZptr = matXZ->GetRowPtr(i);
	SafeVector < PIF >::iterator XZend = XZptr + matXZ->GetRowSize(i);

	VF::iterator base = posterior.begin() + i * (lengthY + 1);

	// iterate through all x[i]-z[k]
	while (XZptr != XZend)
	{
	    SafeVector < PIF >::iterator ZYptr =
		matZY->GetRowPtr(XZptr->first);
	    SafeVector < PIF >::iterator ZYend =
		ZYptr + matZY->GetRowSize(XZptr->first);
	    const float XZval = XZptr->second;

	    // iterate through all z[k]-y[j]
	    while (ZYptr != ZYend)
	    {
		base[ZYptr->first] += XZval * ZYptr->second;
		ZYptr++;
	    }
	    XZptr++;
	}
    }
}

/////////////////////////////////////////////////////////////////
// Relax1()
//
// Computes the consistency transformation for a single sequence
// z, and adds the transformed matrix to "posterior".
/////////////////////////////////////////////////////////////////

void Relax1(SparseMatrix * matZX, SparseMatrix * matZY, VF & posterior)
{

    assert(matZX);
    assert(matZY);

    int lengthZ = matZX->GetSeq1Length();
    int lengthY = matZY->GetSeq2Length();

    // for every z[k]
    for (int k = 1; k <= lengthZ; k++)
    {
	SafeVector < PIF >::iterator ZXptr = matZX->GetRowPtr(k);
	SafeVector < PIF >::iterator ZXend = ZXptr + matZX->GetRowSize(k);

	// iterate through all z[k]-x[i]
	while (ZXptr != ZXend)
	{
	    SafeVector < PIF >::iterator ZYptr = matZY->GetRowPtr(k);
	    SafeVector < PIF >::iterator ZYend =
		ZYptr + matZY->GetRowSize(k);
	    const float ZXval = ZXptr->second;
	    VF::iterator base =
		posterior.begin() + ZXptr->first * (lengthY + 1);

	    // iterate through all z[k]-y[j]
	    while (ZYptr != ZYend)
	    {
		base[ZYptr->first] += ZXval * ZYptr->second;
		ZYptr++;
	    }
	    ZXptr++;
	}
    }
}

/////////////////////////////////////////////////////////////////
// GetSubtree
//
// Returns set containing all leaf labels of the current subtree.
/////////////////////////////////////////////////////////////////

set < int >GetSubtree(const TreeNode * tree)
{
    set < int >s;

    if (tree->GetSequenceLabel() == -1)
    {
	s = GetSubtree(tree->GetLeftChild());
	set < int >t = GetSubtree(tree->GetRightChild());

	for (set < int >::iterator iter = t.begin(); iter != t.end();
	     ++iter)
	    s.insert(*iter);
    }
    else
    {
	s.insert(tree->GetSequenceLabel());
    }

    return s;
}

/////////////////////////////////////////////////////////////////
// TreeBasedBiPartitioning
//
// Uses the iterative refinement scheme from MUSCLE.
/////////////////////////////////////////////////////////////////

void
TreeBasedBiPartitioning(const SafeVector < SafeVector <
			SparseMatrix * > >&sparseMatrices,
			MultiSequence * &alignment, const TreeNode * tree)
{
    // check if this is a node of the alignment tree
    if (tree->GetSequenceLabel() == -1)
    {
	TreeBasedBiPartitioning(sparseMatrices,
				alignment, tree->GetLeftChild());
	TreeBasedBiPartitioning(sparseMatrices,
				alignment, tree->GetRightChild());

	set < int >leftSubtree = GetSubtree(tree->GetLeftChild());
	set < int >rightSubtree = GetSubtree(tree->GetRightChild());
	set < int >leftSubtreeComplement, rightSubtreeComplement;

	// calculate complement of each subtree
	for (int i = 0; i < alignment->GetNumSequences(); i++)
	{
	    if (leftSubtree.find(i) == leftSubtree.end())
		leftSubtreeComplement.insert(i);
	    if (rightSubtree.find(i) == rightSubtree.end())
		rightSubtreeComplement.insert(i);
	}

	// perform realignments for edge to left child
	if (!leftSubtree.empty() && !leftSubtreeComplement.empty())
	{
	    MultiSequence *groupOneSeqs = alignment->Project(leftSubtree);
	    assert(groupOneSeqs);
	    MultiSequence *groupTwoSeqs =
		alignment->Project(leftSubtreeComplement);
	    assert(groupTwoSeqs);
	    delete alignment;
	    alignment =
		AlignAlignments(groupOneSeqs, groupTwoSeqs,
				sparseMatrices);
	}

	// perform realignments for edge to right child
	if (!rightSubtree.empty() && !rightSubtreeComplement.empty())
	{
	    MultiSequence *groupOneSeqs = alignment->Project(rightSubtree);
	    assert(groupOneSeqs);
	    MultiSequence *groupTwoSeqs =
		alignment->Project(rightSubtreeComplement);
	    assert(groupTwoSeqs);
	    delete alignment;
	    alignment =
		AlignAlignments(groupOneSeqs, groupTwoSeqs,
				sparseMatrices);
	}
    }
}

/////////////////////////////////////////////////////////////////
// DoIterativeRefinement()
//
// Performs a single round of randomized partionining iterative
// refinement.
/////////////////////////////////////////////////////////////////

void
DoIterativeRefinement(const SafeVector < SafeVector <
		      SparseMatrix * > >&sparseMatrices,
		      MultiSequence * &alignment)
{
    set < int >groupOne, groupTwo;

    // create two separate groups
    for (int i = 0; i < alignment->GetNumSequences(); i++)
    {
	if (rand() % 2)
	    groupOne.insert(i);
	else
	    groupTwo.insert(i);
    }

    if (groupOne.empty() || groupTwo.empty())
	return;

    // project into the two groups
    MultiSequence *groupOneSeqs = alignment->Project(groupOne);
    assert(groupOneSeqs);
    MultiSequence *groupTwoSeqs = alignment->Project(groupTwo);
    assert(groupTwoSeqs);
    delete alignment;

    // realign
    alignment =
	AlignAlignments(groupOneSeqs, groupTwoSeqs, sparseMatrices);

    delete groupOneSeqs;
    delete groupTwoSeqs;
}


/////////////////////////////////////////////////////////////////
// WriteAnnotation()
//
// Computes annotation for multiple alignment and write values
// to a file.
/////////////////////////////////////////////////////////////////

void
WriteAnnotation(MultiSequence * alignment,
		const SafeVector < SafeVector <
		SparseMatrix * > >&sparseMatrices)
{
    float probprodct=0;
    const int alignLength = alignment->GetSequence(0)->GetLength();
    const int numSeqs = alignment->GetNumSequences();
    int i,j;

    SafeVector < int >position(numSeqs, 0);
    SafeVector < SafeVector < char >::iterator > seqs(numSeqs);
    for (i = 0; i < numSeqs; i++)
	seqs[i] = alignment->GetSequence(i)->GetDataPtr();
    SafeVector < pair < int, int > >active;
    active.reserve(numSeqs);

    column=new columnReliability[alignLength+1];
    column[0].columnNo=0;

    if(flag_ppscore==1)
    {
       cout<<"> Posterior_Probabilities"<<endl;    

    }

    if(flag_gui==1)
	cout<<alignLength<<endl;

    // for every column
    for (i = 1; i <= alignLength; i++)
    {
      //initialize the column reliability structure
      column[i].columnNo=i;
      column[i].probProduct=0;
	// find all aligned residues in this particular column

	active.clear();
	for (j = 0; j < numSeqs; j++)
	{
	    if (seqs[j][i] != '-')
	    {

		    active.push_back(make_pair(j, ++position[j]));

			if(enableVerbose)
			    printf("\nposition[j]=%d\n",position[j]);
	    }
	}

    probprodct=ComputeScore(active, sparseMatrices);
    column[i].probProduct=probprodct;

    if(flag_ppscore==1)
    {
        //Usman: Hack!
        if(probprodct >= 1) { probprodct = 0.99999; }
        cout<<floor(probprodct*10);
    }

    if(enableVerbose)
	  printf("\ncolumn %d %f\n--\n",i,probprodct);



	}


    if(flag_ppscore==1)
    {
         cout<<endl;  
      
    }
      
    if(flag_gui==1)
    { 
        printf("probabilities ..(row column)\n");
        for(i = 1;i <= alignLength ; i++)
            printf("%d %f\n",column[i].columnNo,column[i].probProduct);
     }


     delete[] column;

}

/////////////////////////////////////////////////////////////////
// ComputeScore()
//
// Computes the annotation score for a particular column.
/////////////////////////////////////////////////////////////////

float
ComputeScore(const SafeVector < pair < int, int > >&active,
	     const SafeVector < SafeVector <
	     SparseMatrix * > >&sparseMatrices)
{

    if (active.size() <= 1)
	return 0;

    // ALTERNATIVE #1: Compute the average alignment score.


    float prob_product=0;

    for (int i = 0; i < (int) active.size(); i++)
    {
	for (int j = i + 1; j < (int) active.size(); j++)
	{

            prob_product+=
            sparseMatrices[active[i].first][active[j].first]->
		GetValue(active[i].second, active[j].second);


            if(enableVerbose)
		  printf("%d-%d %d-%d %1.3f %f\n",active[i].first,active[i].second,active[j].first,active[j].second,sparseMatrices[active[i].first][active[j].first]->GetValue(active[i].second, active[j].second), prob_product);
	}
    }

	if(enableVerbose)
           printf("active size= %d \n",(int)active.size());


	return 2*prob_product/((int) active.size() * ((int) active.size() - 1));
}