File: inputPartitioner.C

package info (click to toggle)
ball 1.4.3~beta1-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 318,984 kB
  • sloc: cpp: 346,579; ansic: 4,097; python: 2,664; yacc: 1,778; lex: 1,099; xml: 964; sh: 688; sql: 316; awk: 118; makefile: 108
file content (93 lines) | stat: -rw-r--r-- 2,240 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <fstream>
#include <BALL/QSAR/registry.h>
#include <BALL/QSAR/featureSelection.h>
#include <BALL/QSAR/configIO.h>

using namespace BALL::QSAR;
using namespace BALL;

void startInputPartitioning(std::ifstream& in, QSARData* q, String* data_filename)
{
	InputPartitioningConfiguration conf = ConfigIO::readInputPartitioningConfiguration(&in);
		
	if(conf.done) return; // stop processing this section
		
	bool created_data_object=0;
	if(q==NULL || data_filename==NULL || conf.input_file!=*data_filename)
	{
		if(q==NULL)
		{
			q = new QSARData;
			created_data_object=1;
		}
		q->readFromFile(conf.input_file);
		if(data_filename) *data_filename = conf.input_file;
	}
		
	// -- create prefix for output-files ---
	String output_prefix = conf.input_file;
	int index = output_prefix.find_first_of(".");
	if(index!=(int)string::npos)
	{
		output_prefix=output_prefix.substr(0,index);
	}
	if(conf.ID>0)
	{
		output_prefix+="_"+String(conf.ID);
	}
	// -----------
		
	for(int i=0; i<conf.no_folds; i++)
	{	
		vector<QSARData*> sets = q->generateExternalSet(conf.validation_fraction); // length==2
			
		if(q->isDataCentered())
		{
			bool center_y = q->isResponseCentered();
			sets[0]->centerData(center_y); // train-partition
			sets[1]->centerData(center_y); // test-partition
		}
	
		String train_name = output_prefix+"_TRAIN"+String(i)+".dat";
		String test_name = output_prefix+"_TEST"+String(i)+".dat";
		sets[0]->saveToFile(train_name);
		sets[1]->saveToFile(test_name);
	}
	
	if(created_data_object) delete q;
}
		

#ifndef EXT_MAIN
int main(int argc, char* argv[])
{ 
	if(argc<2)
	{
		cout<<"Please specify configuration file!"<<endl; 
		return 0;
	}
	
	ifstream in(argv[1]);
	if(!in)
	{
		cout<<"Configuration file '"<<argv[1]<<"' not found!"<<endl;
		return 0;
	}
	
	String line;
	
	for(int i=0;!in.eof();i++) // read ALL InputReader section
	{		
		for(int j=0;!in.eof();j++) // skip everthing until the beginning of the next InputReader-section
		{
			getline(in,line);
			if(!line.hasPrefix("[InputPartitioner]")) continue;
			else break;
		}
		if(!line.hasPrefix("[InputPartitioner]")) break; // there are no (more) input-sections!
		ConfigIO::putbackLine(&in,line);
		
		startInputPartitioning(in,NULL,NULL);
	}
}
#endif