File: mergesfffilecommand.cpp

package info (click to toggle)
mothur 1.48.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,692 kB
  • sloc: cpp: 161,866; makefile: 122; sh: 31
file content (351 lines) | stat: -rwxr-xr-x 16,959 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
//
//  mergesfffilecommand.cpp
//  Mothur
//
//  Created by Sarah Westcott on 1/31/14.
//  Copyright (c) 2014 Schloss Lab. All rights reserved.
//

#include "mergesfffilecommand.h"
#include "endiannessmacros.h"

//********************************************************************************
MergeSfffilesCommand::~MergeSfffilesCommand(){
    for (int i = 0; i < commonHeaders.size(); i++) { delete commonHeaders[i]; }
    commonHeaders.clear();
}
//********************************************************************************
vector<string> MergeSfffilesCommand::setParameters(){
	try {
		CommandParameter psff("sff", "InputTypes", "", "", "sffFile", "sffFile", "none","sff",false,false); parameters.push_back(psff);
        CommandParameter pfile("file", "InputTypes", "", "", "sffFile", "sffFile", "none","sff",false,false); parameters.push_back(pfile);
        CommandParameter pkeytrim("keytrim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeytrim);
		CommandParameter poutput("output", "String", "", "", "", "", "","",false,true,true); parameters.push_back(poutput);
        CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
        CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
		CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
		
        abort = false; calledHelp = false;
        
        vector<string> tempOutNames;
        outputTypes["sff"] = tempOutNames;
        
		vector<string> myArray;
		for (int i = 0; i < parameters.size(); i++) {	myArray.push_back(parameters[i].name);		}
		return myArray;
	}
	catch(exception& e) {
		m->errorOut(e, "MergeSfffilesCommand", "setParameters");
		exit(1);
	}
}
//**********************************************************************************
string MergeSfffilesCommand::getHelpString(){
	try {
		string helpString = "";
		helpString += "The merge.sfffiles command reads a sff file or a file containing a list of sff files and merges the individual files into a single sff file. \n";
		helpString += "The merge.sfffiles command parameters are sff, file and output. sff or file is required. \n";
		helpString += "The sff parameter allows you to enter the sff list of sff files separated by -'s.\n";
		helpString += "The file parameter allows you to provide a file containing a list of sff files to merge.  \n";
        helpString += "The keytrim parameter allows you to mergesff files with different keysequence by trimming them to the first 4 characters. Provided the first 4 match.  \n";
        helpString += "The output parameter allows you to provide an output filename.  \n";
		helpString += "Example sffinfo(sff=mySffFile.sff-mySecond.sff).\n";
		return helpString;
	}
	catch(exception& e) {
		m->errorOut(e, "MergeSfffilesCommand", "getHelpString");
		exit(1);
	}
}
//*******************************************************************************
string MergeSfffilesCommand::getOutputPattern(string type) {
    try {
        string pattern = "";
        if (type == "sff")            {   pattern =  "[filename],";   }
        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->setControl_pressed(true);  }
        
        return pattern;
    }
    catch(exception& e) {
        m->errorOut(e, "MergeSfffilesCommand", "getOutputPattern");
        exit(1);
    }
}
//**********************************************************************************
MergeSfffilesCommand::MergeSfffilesCommand(string option) : Command()  {
	try {

		if(option == "help") { help(); abort = true; calledHelp = true; }
		else if(option == "citation") { citation(); abort = true; calledHelp = true;}
        else if(option == "category") {  abort = true; calledHelp = true;  }
		
		else {
			OptionParser parser(option, setParameters());
			map<string, string> parameters = parser.getParameters();
 			
			ValidParameters validParameter;
			
            string inputDir = validParameter.validPath(parameters, "inputdir");
            if (inputDir == "not found"){    inputDir = "";        }
			
			sffFilename = validParameter.validPath(parameters, "sff");
			if (sffFilename == "not found") { sffFilename = "";  }
			else {
				util.splitAtDash(sffFilename, filenames);
				
				//go through files and make sure they are good, if not, then disregard them
				for (int i = 0; i < filenames.size(); i++) {
					bool ignore = false;
					if (filenames[i] == "current") {
						filenames[i] = current->getSFFFile();
						if (filenames[i] != "") {  m->mothurOut("Using " + filenames[i] + " as input file for the sff parameter where you had given current.\n");  }
						else {
							m->mothurOut("You have no current sfffile, ignoring current.\n");  ignore=true;
							//erase from file list
							filenames.erase(filenames.begin()+i);
							i--;
						}
					}
					
					if (!ignore) {
						if (inputDir != "") {
							string path = util.hasPath(filenames[i]);
							//if the user has not given a path then, add inputdir. else leave path alone.
							if (path == "") {	filenames[i] = inputDir + filenames[i];		}
						}
                        
                        bool ableToOpen = util.checkLocations(filenames[i], current->getLocations());
                        
						if (!ableToOpen) {
							m->mothurOut("Unable to open " + filenames[i] + ". It will be disregarded.\n");
							filenames.erase(filenames.begin()+i); //erase from file list
							i--;
						}else { current->setSFFFile(filenames[i]); }
					}
				}
			}
			
			file = validParameter.validFile(parameters, "file");
			if (file == "not open") {  abort = true; }
			else if (file == "not found") { file = "";  }
            
            if ((file == "") && (filenames.size() == 0)) { m->mothurOut("[ERROR]: no valid files.\n");  abort = true; }
            
            if ((file != "") && (filenames.size() != 0)) { //both are given
                m->mothurOut("[ERROR]: cannot use file option and sff option at the same time, choose one.\n");  abort = true;
            }
            
            outputFile = validParameter.validPath(parameters, "output");
			if (outputFile == "not found") { m->mothurOut("you must enter an output file name\n");   abort=true;  }
			if (outputdir != "") { outputFile = outputdir + util.getSimpleName(outputFile);  }
            
            string temp = validParameter.valid(parameters, "keytrim");				if (temp == "not found") { temp = "F"; }
            keyTrim = util.isTrue(temp);
		}
	}
	catch(exception& e) {
		m->errorOut(e, "MergeSfffilesCommand", "MergeSfffilesCommand");
		exit(1);
	}
}
//*****************************************************************************
int MergeSfffilesCommand::execute(){
	try {
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
        
        if (file != "") {
            readFile();
            if (outputdir == "") { outputdir = util.hasPath(file); }
        }
        ofstream out;
        map<string, string> variables;
        string thisOutputDir = outputdir;
		if (outputdir == "") {  thisOutputDir += util.hasPath(outputFile);  }
        variables["[filename]"] = thisOutputDir + util.getSimpleName(outputFile);
		outputFile = getOutputFileName("sff",variables);
        util.openOutputFileBinary(outputFile, out);
        outputNames.push_back(outputFile); outputTypes["sff"].push_back(outputFile);
        outputFileHeader = outputFile + ".headers";
        numTotalReads = 0;
        
		for (int s = 0; s < filenames.size(); s++) {
			
			if (m->getControl_pressed()) {  for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]); 	} return 0; }
			
			long start = time(nullptr);
			
            filenames[s] = util.getFullPathName(filenames[s]);
			m->mothurOut("\nMerging info from " + filenames[s] + " ..." ); m->mothurOutEndLine();
            
			int numReads = mergeSffInfo(filenames[s], out);
            
			m->mothurOut("It took " + toString(time(nullptr) - start) + " secs to merge " + toString(numReads) + ".\n");
		}
        out.close();
        
        //create new common header and add to merged file
        adjustCommonHeader();

		if (m->getControl_pressed()) {  for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]); 	} return 0; }
		
		//set sff file as new current sff file
		string currentName = "";
		itTypes = outputTypes.find("sff");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setSFFFile(currentName); }
		}
		
		//report output filenames
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "MergeSfffilesCommand", "execute");
		exit(1);
	}
}
//*****************************************************************************
int MergeSfffilesCommand::mergeSffInfo(string input, ofstream& out){
	try {
		currentFileName = input;
        
		ifstream in; util.openInputFileBinary(input, in);
		
		SffCommonHeader* header = new SffCommonHeader();
        bool goodHeader = header->read(in);
        
		if (!goodHeader) {  return 0; }
    
        commonHeaders.push_back(header); //save for adjustHeader sanity check
        
		//read through the sff file
        int count = 0; int numFlows = header->getNumFlows();
		while (!in.eof()) {
            
			//read data
			SffRead* read = new SffRead(numFlows);
            
            bool okay = read->readSff(in);
            
            if (!okay) { break; }
			
            read->printSff(out); numTotalReads++; count++;
            delete read;
            
			//report progress
			if((count+1) % 10000 == 0){	m->mothurOut(toString(count+1)); m->mothurOutEndLine();		}
            
			if (m->getControl_pressed()) { count = 0; break;   }
			
			if (count >= header->getNumReads()) { break; }
		}
        
		//report progress
		if (!m->getControl_pressed()) {   if((count) % 10000 != 0){	m->mothurOut(toString(count)); m->mothurOutEndLine();		}  }
		
		in.close();
		
		return count;
	}
	catch(exception& e) {
		m->errorOut(e, "MergeSfffilesCommand", "mergeSffInfo");
		exit(1);
	}
}
//****************************************************************************
void MergeSfffilesCommand::adjustCommonHeader(){
	try {
        //sanity check
        bool okayMagic = true;
        bool okayVersion = true;
        bool okayHeader = true;
        bool okayKeyLength = true;
        bool okayNumFlows = true;
        bool okayformatCode = true;
        bool okayflowChar = true;
        bool okayKeySequence = true;
        if (commonHeaders.size() != 0) {
            unsigned int magicN = commonHeaders[0]->getMagicNumber();
            string version = commonHeaders[0]->getVersion();
            unsigned short headerLength = commonHeaders[0]->getHeaderLength();
            unsigned short keyLength = commonHeaders[0]->getKeyLength();
            unsigned short numFlows = commonHeaders[0]->getNumFlows();
            int flowCode = commonHeaders[0]->getFlowgramFormat();
            string flowChars = commonHeaders[0]->getFlows();
            string keySeq = commonHeaders[0]->getKeySequence();
            
            for (int i = 1; i < commonHeaders.size(); i++) {
                if (commonHeaders[i]->getMagicNumber() != magicN)             { okayMagic = false;  m->mothurOut("[ERROR]: merge issue with common headers. Magic numbers do not match. " + filenames[0] + " magic number is " + toString(magicN) + ", but " + filenames[i] + " magic number is " + toString(commonHeaders[i]->getMagicNumber()) + ".\n");  }
                if (commonHeaders[i]->getVersion() != version)                { okayVersion = false;   m->mothurOut("[ERROR]: merge issue with common headers. Versions do not match. " + filenames[0] + " version is " + version + ", but " + filenames[i] + " version is " + commonHeaders[i]->getVersion() + ".\n");     }
                if (commonHeaders[i]->getHeaderLength() != headerLength)      { okayHeader = false;    m->mothurOut("[ERROR]: merge issue with common headers. Header lengths do not match. " + filenames[0] + " header length is " + toString(headerLength) + ", but " + filenames[i] + " header length is " + toString(commonHeaders[i]->getHeaderLength()) + ".\n");    }
                if (commonHeaders[i]->getKeyLength() != keyLength)            { okayKeyLength = false;  m->mothurOut("[ERROR]: merge issue with common headers. Key Lengths do not match. " + filenames[0] + " Key length is " + toString(keyLength) + ", but " + filenames[i] + " key length is " + toString(commonHeaders[i]->getKeyLength()) + ".\n");    }
                if (commonHeaders[i]->getNumFlows() != numFlows)       { okayNumFlows = false;   m->mothurOut("[ERROR]: merge issue with common headers. Number of flows per read do not match. " + filenames[0] + " number of flows is " + toString(numFlows) + ", but " + filenames[i] + " number of flows is " + toString(commonHeaders[i]->getNumFlows()) + ".\n");     }
                if (commonHeaders[i]->getFlowgramFormat() != flowCode)     { okayformatCode = false;    m->mothurOut("[ERROR]: merge issue with common headers. Flow format codes do not match. " + filenames[0] + " Flow format code is " + toString(flowCode) + ", but " + filenames[i] + " flow format code is " + toString(commonHeaders[i]->getFlowgramFormat()) + ".\n");    }
                if (commonHeaders[i]->getFlows() != flowChars)            { okayflowChar = false;   m->mothurOut("[ERROR]: merge issue with common headers. Flow characters do not match. " + filenames[0] + " Flow characters are " + flowChars + ", but " + filenames[i] + " flow characters are " + commonHeaders[i]->getFlows() + ".\n");    }
                if (commonHeaders[i]->getKeySequence() != keySeq)             { okayKeySequence = false;
                    if (keyTrim) {
                        m->mothurOut("[WARNING]: merge issue with common headers. Key sequences do not match. " + filenames[0] + " Key sequence is " + keySeq + ", but " + filenames[i] + " key sequence is " + commonHeaders[i]->getKeySequence() + ". We will attempt to trim them.\n");
                    }else { m->mothurOut("[ERROR]: merge issue with common headers. Key sequences do not match. " + filenames[0] + " Key sequence is " + keySeq + ", but " + filenames[i] + " key sequence is " + commonHeaders[i]->getKeySequence() + ".\n");
                    }
                }
            }
        }else { m->setControl_pressed(true); return; } //should never get here
        
        bool modify = false;
        if (!okayMagic || !okayVersion || !okayHeader || !okayKeyLength || !okayNumFlows || !okayformatCode || !okayflowChar) { m->setControl_pressed(true); return; }
        if (!okayKeySequence) {
            bool okayKeySequence2 = true;
            string keySeq = commonHeaders[0]->getKeySequence().substr(0,4);
            for (int i = 1; i < commonHeaders.size(); i++) {
                if ((commonHeaders[i]->getKeySequence().substr(0,4)) != keySeq)          { okayKeySequence2 = false;   }
            }
            if (okayKeySequence2 && keyTrim) {  modify = true;
                m->mothurOut("We are able to trim the key sequences. Merged key seqeunce will be " + keySeq + ".\n");
            }
        }
        
        ofstream out;
        util.openOutputFileBinaryAppend(outputFileHeader, out);
        commonHeaders[0]->printSampleCommonHeader(out, numTotalReads);
        out.close();
        
        util.appendSFFFiles(outputFile, outputFileHeader);
        util.renameFile(outputFileHeader, outputFile);
        util.mothurRemove(outputFileHeader);
	}
	catch(exception& e) {
		m->errorOut(e, "MergeSfffilesCommand", "adjustCommonHeader");
		exit(1);
	}
}
//*************************************************************************************
void MergeSfffilesCommand::readFile(){
	try {
        ifstream in; util.openInputFile(file, in);
        
        string filename;
        while(!in.eof()) {
            
            if (m->getControl_pressed()) { return; }
            
            in >> filename; gobble(in);
            
            if (m->getDebug()) { m->mothurOut("[DEBUG]: filename = " + filename + ".\n"); }
            
            bool ableToOpen = util.checkLocations(filename, current->getLocations());
            
            if (!ableToOpen) { //can't find it
                m->mothurOut("[WARNING]: can't find " + filename + ", ignoring.\n");
            }else{  filenames.push_back(filename); }
            
        }
        in.close();
    }
    catch(exception& e) {
        m->errorOut(e, "MergeSfffilesCommand", "readFile");
        exit(1);
    }
}
//******************************************************************************************