File: translateseqscommand.hpp

package info (click to toggle)
mothur 1.48.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 13,684 kB
  • sloc: cpp: 161,854; makefile: 122; sh: 31
file content (140 lines) | stat: -rw-r--r-- 4,419 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//
//  translateseqscommand.hpp
//  Mothur
//
//  Created by Sarah Westcott on 11/8/21.
//  Copyright © 2021 Schloss Lab. All rights reserved.
//

#ifndef translateseqscommand_hpp
#define translateseqscommand_hpp

#include "command.hpp"
#include "sequence.hpp"
#include "protein.hpp"
#include "needlemanoverlap.hpp"


/*
 This command would take...

  * DNA sequences and translate it to an amino acid sequence

 By default it would use the first frame
 The user could also specify the frame (1, 2, 3, -1, -2, -3) or possibly use all 6 frames
 Another option would be stop=T/F. If T, then if the translation hits a stop codon, it stops before that codon. If F, it returns the full translation with a * as the stop codon
 Output as *.aa#.fasta where # is the frame
 
  * Amino acid sequences and translate it to a DNA sequence

 Because of degeneracies there will be non-ATGC IUPAC codes in the output sequence
 Output as *.dna.fasta
 
  * Unaligned DNA and unaligned/aligned Amino acid sequences

 Back translate the amino acid sequence to the DNA sequence so that the DNA is aligned. This should result in the DNA bases being clustered in groups of 3 corresponding to each amino acid codon
 Hopefully the DNA sequence and the amino acid sequence will be in the same frame
 Output alignment as *.dna.align
 
 */

/**************************************************************************************************/

class TranslateSeqsCommand : public Command {
    
public:
    TranslateSeqsCommand(string);
    ~TranslateSeqsCommand(){}
    
    vector<string> setParameters();
    string getCommandName()            { return "tranlate.seqs";              }
    string getCommandCategory()        { return "Sequence Processing";        }
    
    string getHelpString();
    string getCommonQuestions();
    string getOutputPattern(string);
    string getCitation() { return "http://www.mothur.org/wiki/translate.seqs"; }
    string getDescription()        { return "tranlate dna to amino acids or align dna to amino acids"; }
    
    int execute();
    void help() { m->mothurOut(getHelpString()); }
    
private:
    bool abort, stop;
    string fastafile, aminofile;
    int processors;
    vector<string> outputNames;
    vector<int> frames;
    vector<linePair> lines;
    vector<linePair> aLines;
    
    bool setLines(); //returns true if error free
    void translateDNAtoAmino();
    void alignDNAAmino();
    double createProcessesTranslateDNAtoAminoAcids(string, vector<linePair>, int);
    double createProcessesAlign(string);
};

//**********************************************************************************************************************
struct translateSeqsStruct {
    OutputWriter* outputWriter;
    string inputFilename;
    bool stop;
    int frame;
    double numSeqs;
    
    linePair filePos;
    MothurOut* m; Utils util;

    translateSeqsStruct (linePair fP, OutputWriter* oFName, string fname, bool st, int f) {
        
        //passed in
        filePos.start = fP.start;
        filePos.end = fP.end;
        outputWriter = oFName;
        inputFilename = fname;
        frame = f;
        stop = st;
                
        //initialized
        numSeqs = 0;
        m = MothurOut::getInstance();
    }
    ~translateSeqsStruct() = default;
};
//**********************************************************************************************************************
struct alignAminoStruct {
    OutputWriter* outputWriter;
    string fastaFilename, aminoFilename;
    bool stop;
    double numSeqs;
        
    linePair fastaPos;
    linePair aminoPos;
    MothurOut* m; Utils util;
    Alignment* alignment;
        
    alignAminoStruct (linePair fP, linePair aP, OutputWriter* oFName, string fname, string aname, bool st) {
            
        //passed in
        fastaPos.start = fP.start;
        fastaPos.end = fP.end;
        aminoPos.start = aP.start;
        aminoPos.end = aP.end;
        outputWriter = oFName;
        fastaFilename = fname;
        aminoFilename = aname;
        stop = st;
            
        alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, 5000);

        //initialized
        numSeqs = 0;
        m = MothurOut::getInstance();
    }
    ~alignAminoStruct() = default;
};
//**********************************************************************************************************************


#endif /* translateseqscommand_hpp */