1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
// $Id: seqContainerTreeMap.cpp 11896 2013-12-19 17:50:51Z haim $
#include "seqContainerTreeMap.h"
#include "logFile.h"
#include "treeUtil.h"
#include <stdlib.h>
/********************************************************************************************
*********************************************************************************************/
void intersectNamesInTreeAndSequenceContainer(tree& et, sequenceContainer & sc, bool bLeavesOnly){
LOGnOUT(4,<<"\n intersectNames Tree vs Sequence. Before intersect numOfSeq= "<<sc.numberOfSeqs()<<" nunOfTaxa= "<<et.getLeavesNum()<<" Remove "<<abs(et.getLeavesNum() -sc.numberOfSeqs())<<" taxa"<<endl);
treeIterDownTopConst tIt(et);
vector<tree::nodeP> nodes2remove;
vector<int> seqIDs2remove;
//cout<<"tree names:"<<endl;
for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
bool bFound = false;
bool bFound_more = false;
if (bLeavesOnly) {
if (mynode->isInternal())
continue;
}
sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
for (;it != sc.constTaxaEnd(); ++it)
{
string scName = it->name();
string treeNodeName = mynode->name();
if (it->name() == mynode->name())
{
if(bFound)
bFound_more = true;
bFound = true;
//break;
}
if (bFound_more == true)
{
string errMsg = "The taxID:\t";
errMsg += mynode->name();
errMsg += "\twas found again in the sequence file. Removed from sequence.";
LOGnOUT(4,<<errMsg<<endl);
seqIDs2remove.push_back(it->id());
bFound_more = false;
}
}
if (bFound == false)
{
string errMsg = "The taxID:\t";
errMsg += mynode->name();
errMsg += "\twas found in the tree file but not found in the sequence file. Removed from tree.";
LOGnOUT(4,<<errMsg<<endl);
nodes2remove.push_back(mynode);
}
}
for(int i=0; i<nodes2remove.size(); ++i){
et.removeLeaf(nodes2remove[i]);
}
sequenceContainer::constTaxaIterator myseq=sc.constTaxaBegin();
for (;myseq != sc.constTaxaEnd(); ++myseq){
bool bFound = false;
bool bFound_more = false;
for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
if (bLeavesOnly)
{
if (mynode->isInternal())
continue;
}
if (myseq->name() == mynode->name())
{
if(bFound)
bFound_more = true;
bFound = true;
//break;
}
if (bFound_more == true)
{
string errMsg = "The taxID name:\t";
errMsg += myseq->name();
errMsg += "\twas found again in the tree file. Removed.";
LOGnOUT(4,<<errMsg<<endl);
nodes2remove.push_back(mynode);
bFound_more = false;
}
}
if (bFound == false)
{
string errMsg = "The taxID name:\t";
errMsg += myseq->name();
errMsg += "\twas found in the sequence file but not found in the tree file. Removed.";
LOGnOUT(4,<<errMsg<<endl);
seqIDs2remove.push_back(myseq->id());
}
}
for(int i=0; i<seqIDs2remove.size(); ++i){
sc.remove(seqIDs2remove[i]);
}
}
/********************************************************************************************
*********************************************************************************************/
//if bLeavesOnly == true then checks only leaves, otherwise the sequence container includes also internal nodes (as may be the result of simlations
void checkThatNamesInTreeAreSameAsNamesInSequenceContainer(const tree& et,const sequenceContainer & sc, bool bLeavesOnly){
treeIterDownTopConst tIt(et);
//cout<<"tree names:"<<endl;
for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
bool bFound = false;
if (bLeavesOnly) {
if (mynode->isInternal())
continue;
}
sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
for (;it != sc.constTaxaEnd(); ++it)
{
string scName = it->name();
string treeNodeName = mynode->name();
if (it->name() == mynode->name())
{
bFound = true;
break;
}
}
if (bFound == false)
{
string errMsg = "The sequence name: ";
errMsg += mynode->name();
errMsg += " was found in the tree file but not found in the sequence file.\n";
errMsg += " Please, Re-run program with _intersectTreeAndSeq to produce new MSA and Tree.\n";
LOG(4,<<errMsg<<endl);
errorMsg::reportError(errMsg);
}
}
sequenceContainer::constTaxaIterator it=sc.constTaxaBegin();
for (;it != sc.constTaxaEnd(); ++it){
bool bFound = false;
for (tree::nodeP mynode = tIt.first(); mynode != tIt.end(); mynode = tIt.next()) {
if (bLeavesOnly)
{
if (mynode->isInternal())
continue;
}
if (it->name() == mynode->name())
{
bFound = true;
break;
}
}
if (bFound == false)
{
string errMsg = "The sequence name: ";
errMsg += it->name();
errMsg += " was found in the sequence file but not found in the tree file.\n";
errMsg += " Please, Re-run program with _intersectTreeAndSeq to produce new MSA and Tree.\n";
errorMsg::reportError(errMsg);
}
}
}
/********************************************************************************************
// input: a tree and a sequence-container containing all of the leaves sequences.
// output: fills sc_leaves with the sequences of the leaves only.
*********************************************************************************************/
void getLeavesSequences(const sequenceContainer& sc,
const tree& tr, sequenceContainer& sc_leaves) {
vector<string> leavesNames = getSequencesNames(tr);
vector<string>::iterator itr_leaves;
for (itr_leaves=leavesNames.begin();itr_leaves!=leavesNames.end();++itr_leaves) {
sequenceContainer::constTaxaIterator it_sc=sc.constTaxaBegin();
for (;it_sc != sc.constTaxaEnd(); ++it_sc) {
if (it_sc->name() == *(itr_leaves)) {
sc_leaves.add(*it_sc);
break;
}
}
}
if (tr.getLeavesNum() != sc_leaves.numberOfSeqs()) {
string errMsg = "getLeavesSequencese: the number of leaves is not equal to the number of leaves' sequences";
errorMsg::reportError(errMsg);
}
}
|