File: sequenceparser.cpp

package info (click to toggle)
mothur 1.48.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,692 kB
  • sloc: cpp: 161,866; makefile: 122; sh: 31
file content (169 lines) | stat: -rwxr-xr-x 6,970 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*
 *  sequenceParser.cpp
 *  Mothur
 *
 *  Created by westcott on 9/9/11.
 *  Copyright 2011 Schloss Lab. All rights reserved.
 *
 */

#include "sequenceparser.h"

/************************************************************/
SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFile, vector<string> groupsSelected) {
	try {
		
		m = MothurOut::getInstance();
        hasName = true;
		
		//read group file
        GroupMap groupMap;
		int error = groupMap.readMap(groupFile, groupsSelected); //only store info for groups selected
		
		if (error == 1) { m->setControl_pressed(true); }
		
		//initialize maps
        namesOfGroups = groupMap.getNamesOfGroups();
        
        //run splitGroups command to parse files
        string inputString = "";
        if (groupsSelected.size() != 0) { sort(groupsSelected.begin(), groupsSelected.end());   }
        else                            { groupsSelected = namesOfGroups;                       }
        
        for (int i = 0; i < groupsSelected.size(); i++) { util.checkGroupName(groupsSelected[i]); }
        
        inputString += "processors=1, groups=" + util.getStringFromVector(groupsSelected, "-"); //split.groups is paraplellized, we don't want the thread spinning up threads.
        inputString += ", fasta=" + fastaFile;
        inputString += ", name=" + nameFile;
        inputString += ", group=" + groupFile;
        
        m->mothurOut("\n/******************************************/\n");
        m->mothurOut("Running command: split.groups(" + inputString + ")\n");
        
        Command* splitCommand = new SplitGroupCommand(inputString);
        splitCommand->execute();
        
        //type -> files in groups order. fasta -> vector<string>. fastaFileForGroup1 stored in filenames["fasta"][1]
        map<string, vector<string> > filenames = splitCommand->getOutputFiles();
        
        delete splitCommand;
        m->mothurOut("/******************************************/\n");
        
        vector<string> parsedFastaFiles = filenames["fasta"]; //sorted in groups order
        vector<string> parsedNameFiles = filenames["name"]; //sorted in groups order
        vector<string> parsedGroupFiles = filenames["group"]; //sorted in groups order
        
        if (parsedNameFiles.size() != groupsSelected.size()) { cout << "should never get here, quitting\n\n"; m->setControl_pressed(true);  }
        
        for (int i = 0; i < groupsSelected.size(); i++) {
            vector<string> thisSamplesFiles;
            thisSamplesFiles.push_back(parsedFastaFiles[i]);
            thisSamplesFiles.push_back(parsedNameFiles[i]);
            thisSamplesFiles.push_back(parsedGroupFiles[i]);
            groupToFiles[groupsSelected[i]] = thisSamplesFiles;
        }
        
        //reset current files changed by split.groups
        CurrentFile* current; current = CurrentFile::getInstance();
        current->setNameFile(nameFile);
        current->setFastaFile(fastaFile);
        current->setGroupFile(groupFile);
	}
	catch(exception& e) {
		m->errorOut(e, "SequenceParser", "SequenceParser");
		exit(1);
	}
}
/************************************************************/
//leaves all seqs map blank to be filled when asked for
SequenceParser::SequenceParser(string groupFile, string fastaFile, vector<string> groupsSelected) {
	try {
		
        m = MothurOut::getInstance();
        hasName = false;
        
        //read group file
        GroupMap groupMap;
        int error = groupMap.readMap(groupFile, groupsSelected); //only store info for groups selected
        
        if (error == 1) { m->setControl_pressed(true); }
        
        //initialize maps
        namesOfGroups = groupMap.getNamesOfGroups();
        
        //run splitGroups command to parse files
        string inputString = "";
        if (groupsSelected.size() != 0) {
            sort(groupsSelected.begin(), groupsSelected.end());
            for (int i = 0; i < groupsSelected.size(); i++) { util.checkGroupName(groupsSelected[i]); }
            inputString += "groups=" + util.getStringFromVector(groupsSelected, "-");
        }else {
            groupsSelected = namesOfGroups;
            for (int i = 0; i < groupsSelected.size(); i++) { util.checkGroupName(groupsSelected[i]); }
        }
        
        inputString += ", fasta=" + fastaFile;
        inputString += ", group=" + groupFile;
        
        m->mothurOut("\n/******************************************/\n");
        m->mothurOut("Running command: split.groups(" + inputString + ")\n");
        
        Command* splitCommand = new SplitGroupCommand(inputString);
        splitCommand->execute();
        
        //type -> files in groups order. fasta -> vector<string>. fastaFileForGroup1 stored in filenames["fasta"][1]
        map<string, vector<string> > filenames = splitCommand->getOutputFiles();
        
        delete splitCommand;
        m->mothurOut("/******************************************/\n");
        
        vector<string> parsedFastaFiles = filenames["fasta"]; //sorted in groups order
        vector<string> parsedGroupFiles = filenames["group"]; //sorted in groups order
        
        if (parsedFastaFiles.size() != groupsSelected.size()) { cout << "should never get here, quitting\n\n"; m->setControl_pressed(true);  }
        
        for (int i = 0; i < groupsSelected.size(); i++) {
            vector<string> thisSamplesFiles;
            thisSamplesFiles.push_back(parsedFastaFiles[i]);
            thisSamplesFiles.push_back(parsedGroupFiles[i]);
            groupToFiles[groupsSelected[i]] = thisSamplesFiles;
        }
        //reset current files changed by split.groups
        CurrentFile* current; current = CurrentFile::getInstance();
        current->setFastaFile(fastaFile);
        current->setGroupFile(groupFile);
		
	}
	catch(exception& e) {
		m->errorOut(e, "SequenceParser", "SequenceParser");
		exit(1);
	}
}
/************************************************************/
SequenceParser::~SequenceParser(){ }
/************************************************************/
int SequenceParser::getNumGroups(){ return namesOfGroups.size(); }
/************************************************************/
vector<string> SequenceParser::getNamesOfGroups(){ return namesOfGroups; }
/************************************************************/
vector<string> SequenceParser::getFiles(string group){
    try {
        map<string, vector<string> >::iterator it;
        
        it = groupToFiles.find(group);
        if (it != groupToFiles.end()) {
            return it->second;
        }else {
            m->mothurOut("[ERROR]: cannot find files for group " + group + ", quitting.\n"); m->setControl_pressed(true);
        }
        
        return nullVector;
    }
    catch(exception& e) {
        m->errorOut(e, "SequenceParser", "getFiles");
        exit(1);
    }
}
/************************************************************/