File: Threading.h

package info (click to toggle)
macromoleculebuilder 4.0.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 122,488 kB
  • sloc: cpp: 23,632; python: 5,047; ansic: 2,101; awk: 145; perl: 144; makefile: 40; sh: 21
file content (209 lines) | stat: -rw-r--r-- 12,675 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#include "Utils.h"
#include "BiopolymerClass.h"

#include <array>

typedef char TChar;                             // character type
typedef seqan::String<TChar> TSequence;                // sequence type
typedef seqan::Align<TSequence, seqan::ArrayGaps> TAlign;



struct ThreadingPartner {
    BiopolymerClass biopolymerClass;
    vector <ResidueID> includedResidues;
    ResidueID startResidue;
    ResidueID endResidue;
    TSequence sequence;
};


class ThreadingStruct {
   private:
       std::array<ThreadingPartner, 2> threadingPartners;
       seqan::AlignmentStats alignmentStats;
       TAlign align;
       bool alignHasBeenComputed;
       
    public:
        //double gapPenalty;
        // This method is not needed. just use updThreadingPartner.
        //void setThreadingPartner (ThreadingPartner myThreadingPartner, int index){threadingPartners[index] =  myThreadingPartner;}
        // In homologyScanner, per convention partner 0 is the homologJob, partner 1 is the PrimaryJob
        ThreadingPartner & updThreadingPartner (int index){return threadingPartners[index];}
        ThreadingPartner  getThreadingPartner (int index) const {return threadingPartners[index];}
        std::string getChain(int index){return threadingPartners[index].biopolymerClass.getChainID();};
        //void print(){}
        void setDefaultStartEndResidues(){
            updThreadingPartner(0).startResidue = updThreadingPartner(0).biopolymerClass.getFirstResidueID();
            updThreadingPartner(0).  endResidue = updThreadingPartner(0).biopolymerClass. getLastResidueID();
            updThreadingPartner(1).startResidue = updThreadingPartner(1).biopolymerClass.getFirstResidueID();
            updThreadingPartner(1).  endResidue = updThreadingPartner(1).biopolymerClass. getLastResidueID();
        }
        double forceConstant;
        bool backboneOnly;
        //bool isGapped; //if False, then alignment is being provided explicitly. if True, precise alignment will be determined by MMB/SeqAn
        double matchScore; 
        double mismatchScore;
        double gapPenalty;   
	String scoringScheme;
        bool deadLengthIsFractionOfInitialLength; //If True, then dead length of each spring will be set to deadLengthFraction * <initial spring extension>. It makes sense that 1 > deadLengthFraction > 0.
        double deadLength; // This is an absolute dead length for the alignment springs. For default homology modeling behavior, should be 0.
        double deadLengthFraction;
        seqan::AlignmentStats getAlignmentStats(){return alignmentStats;}
        void   setAlignmentStats(seqan::AlignmentStats myAlignmentStats){ alignmentStats = myAlignmentStats;}

        // align, alignmentStats should be empty, undefined, or garbage unless and until this method is called:
        void setLongSequences(){
            threadingPartners[0].sequence = (threadingPartners[0].biopolymerClass.getSubSequence(threadingPartners[0].startResidue , threadingPartners[0].endResidue )).c_str();
            threadingPartners[1].sequence = (threadingPartners[1].biopolymerClass.getSubSequence(threadingPartners[1].startResidue , threadingPartners[1].endResidue )).c_str();
            computeAlign(); // Just to make sure align is in sync with the new sequences
        }

        void sortIncludedResidues(){
            //threadingPartners[0].includedResidues = 
            threadingPartners[0].biopolymerClass.sort(threadingPartners[0].includedResidues);
            //threadingPartners[1].includedResidues = 
            threadingPartners[1].biopolymerClass.sort(threadingPartners[1].includedResidues);
        }

	bool hasResidue( ResidueID residue , int biopolymerIndex ){
	    for (int i = 0; i < threadingPartners[biopolymerIndex].includedResidues.size() ;  i++){
		if  (  threadingPartners[biopolymerIndex].includedResidues[i] == residue) return 1;
	    }
	    return 0;
	}

	void supplementIncludedResidues(int fromBiopolymer, int toBiopolymer){
	    for (int i = 0; i < threadingPartners[fromBiopolymer].includedResidues.size() ;  i++){
		ResidueID correspondingResidue;
		if (!( getCorrespondingResidue(threadingPartners[fromBiopolymer].includedResidues[i], correspondingResidue, fromBiopolymer, toBiopolymer) )) // returns 0 if successful
		{
		    if (!(hasResidue(  correspondingResidue , toBiopolymer ) )){
			threadingPartners[toBiopolymer].includedResidues.push_back(correspondingResidue);
		    }  // of if
		} // of if
	    }  // of for
	} // of method

	void supplementIncludedResidues(){
	    supplementIncludedResidues(0,1);
	    supplementIncludedResidues(1,0);
	} // of method

	void setShortSequences(){
	    sortIncludedResidues();
	    threadingPartners[0].sequence = (threadingPartners[0].biopolymerClass.getSequence(threadingPartners[0].includedResidues)).c_str();
	    threadingPartners[1].sequence = (threadingPartners[1].biopolymerClass.getSequence(threadingPartners[1].includedResidues)).c_str();
	    computeAlign(); // Just to make sure align is in sync with the new sequences
	}

        TAlign computeAlign(){
	    seqan::resize(rows(align), 2);
	    assignSource(row(align,0),threadingPartners[0].sequence);
	    assignSource(row(align,1),threadingPartners[1].sequence);
	    //seqan::Score<int,seqan::Blosum62(-1,-12)> scoringSchemeObject;
	    int score = -11111;
	    if (scoringScheme == "Blosum62"){
                seqan::Blosum62 scoringSchemeObject(-1,-12);
	        score = globalAlignment(align,scoringSchemeObject ); // ..signature:Score<TValue, Simple>(match, mismatch, gap [, gap_open])
	        std::cout <<__FILE__<<":"<<__LINE__<< "Score: " << score << ::std::endl;
	        computeAlignmentStats(alignmentStats, align, scoringSchemeObject);
	    }
            else if (scoringScheme == "Simple"){
                seqan::SimpleScore scoringSchemeObject(matchScore,mismatchScore, gapPenalty );
	        score = globalAlignment(align,scoringSchemeObject ); // ..signature:Score<TValue, Simple>(match, mismatch, gap [, gap_open])
	        std::cout <<__FILE__<<":"<<__LINE__<< "Score: " << score << ::std::endl;
	        computeAlignmentStats(alignmentStats, align, scoringSchemeObject);
	        //seqan::Simple scoringSchemeObject(matchScore,mismatchScore, alignmentForcesGapPenalty); 
	    } else {
	        MMBLOG_FILE_FUNC_LINE(CRITICAL, " Your requested scoring scheme : "<< scoringScheme <<" is not supported. Please use one of the supported types."<<endl);
	    }
	    //seqan::Blosum62 scoringScheme(-1, -12);
	    //// Args: match score, mismatch score, gap penalty
	    //seqan::Simple   scoringScheme (matchScore,mismatchScore, alignmentForcesGapPenalty )
	    //int score = globalAlignment(align,scoringSchemeObject ); // ..signature:Score<TValue, Simple>(match, mismatch, gap [, gap_open])
	    //int score = globalAlignment(align,scoringScheme ); // ..signature:Score<TValue, Simple>(match, mismatch, gap [, gap_open])
	    //"62" means matrix was constructed with max 62% seq. identity alignment.
	    std::cout <<__FILE__<<":"<<__LINE__<< " SeqAn sequence alignment follows: "  << ::std::endl;
	    std::cout <<__FILE__<<":"<<__LINE__<< align << ::std::endl;
	    printAlignmentStats();
	    alignHasBeenComputed = 1;
            return align;
	}
	/*
	// For reference, from the old BiopolymerClass.cpp:
        TAlign BiopolymerClass::createGappedAlignment(BiopolymerClass otherBiopolymerClass, double alignmentForcesGapPenalty ){ // Set a default value of -1 for the gap penalty to allow gaps. For ungapped, do a big value e.g. -1000
        TSequence seqA = getSubSequence(getFirstResidueID(),getLastResidueID()  ).c_str();  // Need a new BiopolymerClass method which retrieves subsequences.!
        TSequence seqB = otherBiopolymerClass.getSubSequence(otherBiopolymerClass.getFirstResidueID(), otherBiopolymerClass.getLastResidueID() ).c_str();
        TAlign align;
        seqan::resize(rows(align), 2);
        assignSource(row(align,0),seqA);
        assignSource(row(align,1),seqB);
        // simple alignment:
        int score = globalAlignment(align, seqan::Score<int,seqan::Simple>(0,-1, alignmentForcesGapPenalty )); // ..signature:Score<TValue, Simple>(match, mismatch, gap [, gap_open])
        return align;
}
	 */ 

	// return 0 for success, 1 for failure
	bool getCorrespondingResidue(const ResidueID queryResidue, ResidueID & correspondingResidue, const int queryBiopolymerIndex, const int correspondingBiopolymerIndex){
	    if (!( queryBiopolymerIndex ^ correspondingBiopolymerIndex )){std::cout <<__FILE__<<":"<<__LINE__<< " queryBiopolymerIndex and  correspondingBiopolymerIndex must be set to 0,1 or 1,0. You have   "<< queryBiopolymerIndex <<" , " << correspondingBiopolymerIndex <<std::endl; exit(1);}
	    if (( queryBiopolymerIndex > 1) ||  ( queryBiopolymerIndex < 0) ){std::cout <<__FILE__<<":"<<__LINE__<< " queryBiopolymerIndex =  "<< queryBiopolymerIndex <<" is out of range. Expected 0 or 1. "  <<std::endl; exit(1);}
	    if (( correspondingBiopolymerIndex > 1) ||  ( correspondingBiopolymerIndex < 0) ){std::cout <<__FILE__<<":"<<__LINE__<< " correspondingBiopolymerIndex =  "<< correspondingBiopolymerIndex <<" is out of range. Expected 0 or 1. "  <<std::endl; exit(1);}
	    if (!(alignHasBeenComputed)) {std::cout <<__FILE__<<":"<<__LINE__<< " alignHasBeenComputed = "<< alignHasBeenComputed <<std::endl; exit(1);}
	    int querySourceIndex = threadingPartners[queryBiopolymerIndex].biopolymerClass.getResidueIndex(queryResidue) -  threadingPartners[queryBiopolymerIndex].biopolymerClass.getResidueIndex(threadingPartners[queryBiopolymerIndex].startResidue);
	     int viewIndex = toViewPosition(row(align, queryBiopolymerIndex),querySourceIndex);
	     int correspondingSourceIndex =  toSourcePosition(row(align,correspondingBiopolymerIndex ) ,viewIndex);
	     if (String(seqan::row (align,correspondingBiopolymerIndex )[viewIndex]) == ("-")   ){
		 //std::cout <<__FILE__<<":"<<__LINE__<< " queryResidue = "<<queryResidue.outString()<< " of queryBiopolymerIndex "<<queryBiopolymerIndex<< " with chain ID "<<   threadingPartners[queryBiopolymerIndex].biopolymerClass.getChainID() << " is an insertion with respect to correspondingBiopolymerIndex. Unable to set correspondingResidue "  <<std::endl;
		 return 1;
	     }
	     correspondingResidue =  threadingPartners[correspondingBiopolymerIndex].biopolymerClass.sum( threadingPartners[correspondingBiopolymerIndex].startResidue, correspondingSourceIndex);
	     return 0;
	 }; // of method

	 const void printAlignmentStats(){
	     std::cout<<__FILE__<<":"<<__LINE__<<std::endl;
	     std::cout << align
		   << "score:               " << alignmentStats.alignmentScore << "\n"
		   << "gap opens:           " << alignmentStats.numGapOpens << "\n"
		   << "gap extensions:      " << alignmentStats.numGapExtensions << "\n"
		   << "num insertions:      " << alignmentStats.numInsertions << "\n"
		   << "num deletions:       " << alignmentStats.numDeletions << "\n"
		   << "num matches:         " << alignmentStats.numMatches << "\n"
		   << "num mismatches:      " << alignmentStats.numMismatches << "\n"
		   << "num positive scores: " << alignmentStats.numPositiveScores << "\n"
		   << "num negative scores: " << alignmentStats.numNegativeScores << "\n"
		   << "percent similarity:  " << alignmentStats.alignmentSimilarity << "\n"
		   << "alignment length  :  " << alignmentStats.alignmentLength     << "\n"
		   << "percent identity:    " << alignmentStats.alignmentIdentity << "\n\n\n";
	     std::cout<<__FILE__<<":"<<__LINE__<<std::endl;
	 }


	ThreadingStruct(BiopolymerClass  biopolymerClass0, ResidueID resStart0, ResidueID resEnd0,
		     BiopolymerClass  biopolymerClass1, ResidueID resStart1, ResidueID resEnd1,
		     double forceConstant, bool backboneOnly
		     ) //:

	    //residueStart0(resStart0), residueEnd0(resEnd0),
			 //residueStart1(resStart1), residueEnd1(resEnd1),
			 //forceConstant(forceConstant), backboneOnly(backboneOnly)
	    {   
		updThreadingPartner(0).biopolymerClass = std::move(biopolymerClass0);
		updThreadingPartner(1).biopolymerClass = std::move(biopolymerClass1);
		updThreadingPartner(0).includedResidues.clear();
		//chain1ResiduesIncluded.clear(); 
		alignHasBeenComputed = 0; 
		threadingPartners[0].sequence = ""; threadingPartners[1].sequence = "";}

		// This constructor has to change because we are getting rido of chainID's. Also, usage in MMB has to change.
	ThreadingStruct() 
		     {
	        alignHasBeenComputed = 0;
	        threadingPartners[0].includedResidues.clear(); threadingPartners[1].includedResidues.clear();
                threadingPartners[0].sequence = ""; threadingPartners[1].sequence = "";}
}; // of class