1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
#include "muscle.h"
#include "profile.h"
#include "msa.h"
#include "pwpath.h"
#include "seqvect.h"
#include "clust.h"
#include "tree.h"
#define TRACE 0
struct Range
{
unsigned m_uBestColLeft;
unsigned m_uBestColRight;
};
static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount,
const Range *Ranges, unsigned uRangeCount)
{
if (!g_bVerbose || !g_bAnchors)
return;
double dTotalArea = uColCount*uColCount;
double dArea = 0.0;
for (unsigned i = 0; i < uRangeCount; ++i)
{
unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft;
dArea += uLength*uLength;
}
double dPct = (dTotalArea - dArea)*100.0/dTotalArea;
Log("Anchor columns found %u\n", uAnchorColCount);
Log("DP area saved by anchors %-4.1f%%\n", dPct);
}
static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount,
unsigned uColCount, Range Ranges[])
{
// N best columns produces N+1 vertical blocks.
const unsigned uRangeCount = uBestColCount + 1;
for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex)
{
unsigned uBestColLeft = 0;
if (uIndex > 0)
uBestColLeft = BestCols[uIndex-1];
unsigned uBestColRight = uColCount;
if (uIndex < uBestColCount)
uBestColRight = BestCols[uIndex];
Ranges[uIndex].m_uBestColLeft = uBestColLeft;
Ranges[uIndex].m_uBestColRight = uBestColRight;
}
}
// Return true if any changes made
bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
{
bool bAnyChanges = false;
const unsigned uColCountIn = msaIn.GetColCount();
const unsigned uSeqCountIn = msaIn.GetSeqCount();
if (uColCountIn < 3 || uSeqCountIn < 3)
return false;
unsigned *AnchorCols = new unsigned[uColCountIn];
unsigned uAnchorColCount;
SetMSAWeightsMuscle(msaIn);
FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);
const unsigned uRangeCount = uAnchorColCount + 1;
Range *Ranges = new Range[uRangeCount];
#if TRACE
Log("%u ranges\n", uRangeCount);
#endif
ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);
#if TRACE
{
Log("Anchor cols: ");
for (unsigned i = 0; i < uAnchorColCount; ++i)
Log(" %u", AnchorCols[i]);
Log("\n");
Log("Ranges:\n");
for (unsigned i = 0; i < uRangeCount; ++i)
Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
}
#endif
delete[] AnchorCols;
MSA msaOut;
msaOut.SetSize(uSeqCountIn, 0);
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
{
const char *ptrName = msaIn.GetSeqName(uSeqIndex);
unsigned uId = msaIn.GetSeqId(uSeqIndex);
msaOut.SetSeqName(uSeqIndex, ptrName);
msaOut.SetSeqId(uSeqIndex, uId);
}
for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
{
MSA msaRange;
const Range &r = Ranges[uRangeIndex];
const unsigned uFromColIndex = r.m_uBestColLeft;
const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;
if (0 == uRangeColCount)
continue;
else if (1 == uRangeColCount)
{
MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
MSAAppend(msaOut, msaRange);
continue;
}
MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);
#if TRACE
Log("\n-------------\n");
Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
Log("Before:\n");
msaRange.LogMe();
#endif
bool bLockLeft = (0 != uRangeIndex);
bool bLockRight = (uRangeCount - 1 != uRangeIndex);
bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);
#if TRACE
Log("After:\n");
msaRange.LogMe();
#endif
MSAAppend(msaOut, msaRange);
#if TRACE
Log("msaOut after Cat:\n");
msaOut.LogMe();
#endif
}
#if DEBUG
// Sanity check
AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
#endif
delete[] Ranges;
if (bAnyChanges)
msaIn.Copy(msaOut);
return bAnyChanges;
}
|