File: ExtractProteinSequence.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 239,848 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; makefile: 93
file content (75 lines) | stat: -rw-r--r-- 1,971 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//

#include <BALL/FORMAT/PDBFile.h>
#include <BALL/FORMAT/lineBasedFile.h>
#include <BALL/FORMAT/commandlineParser.h>
#include <BALL/KERNEL/system.h>
#include <BALL/STRUCTURE/peptides.h>

#include "version.h"

using namespace BALL;

int main(int argc, char* argv[])
{
	// instantiate CommandlineParser object
	CommandlineParser parpars("ExtractProteinSequence", "extracts fasta sequence", VERSION, String(__DATE__), "Get Data");
	parpars.registerMandatoryInputFile("i",  "input pdb file from which to extract ");
	parpars.registerMandatoryOutputFile("o",  "output fasta file");
	parpars.registerOptionalStringParameter("c",  "chain specifier");

	// the manual
	String man = String("This tool extracts the fasta sequence from a given pdb file.");

	parpars.setToolManual(man);

	parpars.setSupportedFormats("i", "pdb");
	parpars.setSupportedFormats("o", "fasta");

	// parse the command line
	parpars.parse(argc, argv);

	PDBFile in_file;
	in_file.open(parpars.get("i"), std::ios::in);

	if (!in_file)
	{
		// if file does not exist: complain and abort
		Log.error() << "error opening " << parpars.get("i") << " for input." << std::endl;
		exit(2);
	}

	//TODO switch to the BALL FastaFile format if available
	LineBasedFile out_file;
	out_file.open(parpars.get("o"), std::ios::out);

	if (!out_file)
	{
		// if file does not exist: complain and abort
		Log.error() << "error opening " << parpars.get("o") << " for output." << std::endl;
		exit(2);
	}

	System system;
	in_file >> system;
	in_file.close();

	for (ProteinIterator it = system.beginProtein(); +it; ++it)
	{
		ChainConstIterator c_it = it->beginChain();
		for (; +c_it ; ++c_it)
		{
			// get the chain's sequence
			String seq = Peptides::GetSequence(*c_it);
			out_file << "> " << it->getName() + ":" + c_it->getName() << std::endl << seq << std::endl;
		}
	}

	out_file.close();
	Log << "wrote file " << parpars.get("o") << std::endl;

	return 0;

}