File: ExtractClustersFromWardTree.C

package info (click to toggle)
ball 1.5.0%2Bgit20180813.37fc53c-11.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 239,928 kB
  • sloc: cpp: 326,149; ansic: 4,208; python: 2,303; yacc: 1,778; lex: 1,099; xml: 958; sh: 322; javascript: 164; makefile: 88
file content (183 lines) | stat: -rw-r--r-- 5,769 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
// -*- Mode: C++; tab-width: 2; -*-
// vi: set ts=2:
//

#include <BALL/DOCKING/COMMON/poseClustering.h>
#include <BALL/FORMAT/DCDFile.h>
#include <BALL/FORMAT/PDBFile.h>
#include <BALL/FORMAT/lineBasedFile.h>
#include <BALL/DOCKING/COMMON/conformationSet.h>

#include <BALL/FORMAT/commandlineParser.h>
#include <iostream>
#include "version.h"

using namespace std;
using namespace BALL;

int main (int argc, char **argv)
{
	// instantiate CommandlineParser object supplying
	// - tool name
	// - short description
	// - version string
	// - build date
	// - category
	CommandlineParser parpars("ExtractClustersFromWardTree", "extracts docking clusters ", VERSION, String(__DATE__), "Docking");

	// we register an input file parameter 
	// - CLI switch
	// - description
	// - parameter type
	// - required
	parpars.registerMandatoryInputFile("i", "input serialized cluster file");
	parpars.registerOptionalStringParameter("i_type", "input type (binary, text)", "binary");

	// we register an output file parameter 
	// - CLI switch
	// - description
	// - parameter type
	// - required
	// - default value
	// - hidden in galaxy
	parpars.registerMandatoryOutputFile("o_out", "output file name ");
	parpars.setParameterAsHidden("o_out");

	// we register the output type
	parpars.registerOptionalStringParameter("o_type", "output type (gv, json, index_list) ", "index_list");
	list<String> output_types;
	output_types.push_back("gv");
	output_types.push_back("json");
	output_types.push_back("index_list");
	parpars.setParameterRestrictions("o_type", output_types);

	// we register the cutoff type
	parpars.registerOptionalStringParameter("cutoff_type", "cutoff type (ward_distance, num_clusters) ", "ward_distance");
	list<String> cutoff_types;
	cutoff_types.push_back("ward_distance");
	cutoff_types.push_back("num_clusters");
	parpars.setParameterRestrictions("cutoff_type", cutoff_types);

	// we register the cutoff value, either the minimal ward distance between the clusters
	//                               or the number of clusters to split into
	parpars.registerOptionalDoubleParameter("cut_value", "cut value for splitting the given WART tree using the cutoff-type (default 5.0) ", 5.0);
	parpars.setParameterRestrictions("cut_value", 0.0, 10000.0);

	// we register a parameter defining the minimal size of clusters - e.g. for filtering out single outlieers
	parpars.registerOptionalIntegerParameter("min_size", "minimal size of clusters (default 1) ", 1);
	parpars.setParameterRestrictions("min_size", 1, 10000);

	// the manual
	String man = "This tool extracts clusters of docking poses given a dat file.\n\nParameters are the filename (-i) of the serialized cluster tree, the output filename (-o_out), the output type (-o_type). The optional parameter -i_type allows to switch between binary (default) and text file for the cluster tree input, parameter -min_size allows to filter for cluster of a minimal size, parameter -cutoff_type defines the way to cut the cluster tree (either by ward distance or by a target number of clusters) using paramter -cut_value.\n\nOutput of this tool is the extracted cluster tree, either as index list, as graph visualization (gv) input, or as json";

	parpars.setToolManual(man);

	// here we set the types of I/O files
	parpars.setSupportedFormats("i", "dat");
	parpars.setSupportedFormats("o_out", "txt,gv,json");

	parpars.parse(argc, argv);

	//////////////////////////////////////////////////


	// read the input	
	File tree;
	tree.open(parpars.get("i"));

	PoseClustering pc;
	// this only works for Nearest neighbor chain ward trees...
	pc.options.set(PoseClustering::Option::CLUSTER_METHOD, PoseClustering::NEAREST_NEIGHBOR_CHAIN_WARD);

	// import a binary file
	bool binary = parpars.get("i_type") == "binary";
	pc.deserializeWardClusterTree(tree, binary);

	int min_cluster_size = 0;
	if (parpars.has("min_size"))
	{
		min_cluster_size = parpars.get("min_size").toInt();
	}
	Log << "  Use min_size = " << min_cluster_size << endl;


	float cut_value = 5;
	if (parpars.has("cut_value"))
	{
		cut_value = parpars.get("cut_value").toFloat();
	}
	Log << "  Use cut_value = " << cut_value << endl;


	float num_clusters_to_extract = 5.;
	int   max_ward_dist = 5;

	//std::vector<std::set<Index> > clusters;

	if (parpars.has("cutoff_type"))
	{
		String type = parpars.get("cutoff_type");
		Log << "  Use cutoff_type = " << type << endl;

		if (type == "ward_distance")
		{
			max_ward_dist = cut_value;

			//clusters = 
			pc.extractClustersForThreshold(max_ward_dist, min_cluster_size);
		}
		else if (type == "num_clusters")
		{
			num_clusters_to_extract = cut_value;

			//clusters = 
			pc.extractNBestClusters(num_clusters_to_extract);
			pc.filterClusters(min_cluster_size);
		}
		else
		{
			Log.info() << "Unknown value " << type  << " for option cutoff_type." << endl;
			return 1;
		}
	}

	Log << endl << "Extracted " << pc.getNumberOfClusters()  << " clusters, start writing... ";

	String outfile_name = String(parpars.get("o_out"));

	if (parpars.get("o_type") == "index_list")
	{
		File cluster_outfile(outfile_name, std::ios::out);
		pc.printClusters(cluster_outfile);

		Log << outfile_name << endl;
	}
	else if (parpars.get("o_type") == "gv")
	{
		File gv_outfile(outfile_name, std::ios::out);

		pc.exportWardClusterTreeToGraphViz(gv_outfile);
		gv_outfile.close();

		Log << outfile_name << endl;
		Log << "For drawing the graph use, e.g. \n\tdot -Tps -o tree.ps " << outfile_name << endl;
	}
	else if (parpars.get("o_type") == "json")
	{
		File json_outfile(outfile_name, std::ios::out);

		pc.exportClusterTreeToJSON(json_outfile);
		json_outfile.close();

		Log << outfile_name << endl;
	}
	else
	{
		Log << "Unspecified output!!" << endl;
	}

	Log << "done." << endl;

return 0;
}