File: wagon_main.cc

package info (click to toggle)
speech-tools 1.1.0-1
  • links: PTS
  • area: non-free
  • in suites: slink
  • size: 5,280 kB
  • ctags: 6,800
  • sloc: cpp: 53,460; ansic: 2,936; java: 1,405; makefile: 830; perl: 728; sh: 538; awk: 49; pascal: 14
file content (203 lines) | stat: -rw-r--r-- 8,015 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                      Copyright (c) 1996,1997                          */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify and distribute this software and its */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                     Author :  Alan W Black                            */
/*                     Date   :  May 1996                                */
/*-----------------------------------------------------------------------*/
/*  A Classification and Regression Tree (CART) Program                  */
/*  A basic implementation of many of the techniques in                  */
/*  Briemen et al. 1984                                                  */
/*                                                                       */
/*  Added decision list support, Feb 1997                                */
/*                                                                       */
/*=======================================================================*/
#include <stdlib.h>
#include <iostream.h>
#include <fstream.h>
#include <string.h>
#include "EST_Wagon.h"
#include "EST_cmd_line.h"

enum wn_strategy_type {wn_decision_list, wn_decision_tree};

static wn_strategy_type wagon_type = wn_decision_tree;

static int wagon_main(int argc, char **argv);

int main(int argc, char **argv)
{

    wagon_main(argc,argv);

    exit(0);
    return 0;
}

static int wagon_main(int argc, char **argv)
{
    // Top level function sets up data and creates a tree
    EST_Option al;
    EST_StrList files;
    ostream *wgn_coutput = 0;
    float stepwise_limit = 0;

    parse_command_line(argc, argv,
       EST_String("Usage:\n")+
       "wagon  <options>\n"+
       "Wagon CART building program (defaults in {})\n"+
       "-desc <ifile>     Field description file\n"+
       "-data <ifile>     Datafile, one vector per line\n"+
       "-stop <int> {50}  Minimum number of examples for leaf nodes\n"+
       "-test <ifile>     Datafile to test tree on\n"+
       "-frs <float> {10} Float range split, number of partitions to\n"+
       "                  split a float feature range into\n"+
       "-distmatrix <ifile>\n"+
       "                  A distance matrix for clustering\n"+
       "-dlist            Build a decision list (rather than tree)\n"+
       "-dtree            Build a decision tree (rather than list) default\n"+
       "-output <ofile>   File to save output tree in\n"+
       "-v                Print version number and exit\n"+
       "-quiet            No questions printed during building\n"+
       "-verbose          Lost of information printing during build\n"+
       "The following are *provisional* and may not work\n"+
       "-stepwise         Incrementally find best features\n"+
       "-swlimit <float> {0.0}\n"+
       "                  Percentage necessary improvement for stepwise\n"+
       "-balance <float>  For derived stop size, if dataset at node, divided\n"+
       "                  by balance is greater than stop it is used as stop\n"+
       "                  if balance is 0 (default) always use stop as is.\n"+
       "-held_out <int>   Percent to hold out for pruning\n"+
       "-heap <int> {210000}\n"+
       "              Set size of Lisp heap, should not normally need\n"+
       "              to be changed from its default, only with *very*\n"+
       "              large description files (> 1M)\n"+
       "-noprune          No (same class) pruning required\n",
		       files, al);

    if (al.present("-v"))
    {
	printf("%s: %s\n",argv[0],wagon_version);
	exit(0);
    }

    if (al.present("-subset"))
	wgn_csubset = TRUE;
    if (al.present("-held_out"))
	wgn_held_out = al.ival("-held_out");
    if (al.present("-balance"))
	wgn_balance = al.fval("-balance");
    if ((!al.present("-desc")) || ((!al.present("-data"))))
    {
	cerr << argv[0] << ": missing description and/or datafile" << endl;
	cerr << "use -h for description of arguments" << endl;
    }

    if (al.present("-quiet"))
	wgn_quiet = TRUE;
    if (al.present("-verbose"))
	wgn_verbose = TRUE;

    if (al.present("-test"))
    {
	wgn_desc_file = al.val("-desc");
	wgn_test_file = al.val("-test");
    }
    if (al.present("-stop"))
	wgn_min_cluster_size = atoi(al.val("-stop"));
    if (al.present("-noprune"))
	wgn_prune = FALSE;
    if (al.present("-swlimit"))
	stepwise_limit = al.fval("-swlimit");
    if (al.present("-frs"))   // number of partitions to try in floats
	wgn_float_range_split = atof(al.val("-frs"));
    if (al.present("-output"))
    {
	wgn_coutput = new ofstream(al.val("-output"));
	if (!(*wgn_coutput))
	{
	    cerr << "Wagon: can't open file \"" << al.val("-output") <<
		"\" for output " << endl;
	    exit(-1);
	}
    }
    else
	wgn_coutput = &cout;
    if (al.present("-distmatrix"))
    {
	if (wgn_DistMatrix.load(al.val("-distmatrix")) != 0)
	{
	    cerr << "Wagon: failed to load Distance Matrix from \"" <<
		al.val("-distmatrix") << "\"\n" << endl;
	    exit(-1);
	}
    }
    if (al.present("-dlist"))
	wagon_type = wn_decision_list;

    WNode *tree;
    float score;

    siod_init(al.ival("-heap"));

    // Load in the data
    wgn_load_datadescription(al.val("-desc"));
    wgn_load_dataset(wgn_dataset,al.val("-data"));
    if (al.present("-distmatrix") &&
	(wgn_DistMatrix.num_rows() < wgn_dataset.length()))
    {
	cerr << "wagon: distance matrix is smaller than number of training elements\n";
	exit(-1);
    }

    if (al.present("-test"))
	wgn_load_dataset(wgn_test_dataset,al.val("-test"));

    // Build and test the model 
    if (al.present("-stepwise"))
	tree = wagon_stepwise(stepwise_limit);
    else if (wagon_type == wn_decision_tree)
	tree = wgn_build_tree(score);  // default operation
    else if (wagon_type == wn_decision_list)
	// dlist is printed with build_dlist rather than returned
	tree = wgn_build_dlist(score,wgn_coutput);
    else
    {
	cerr << "Wagon: unknown operation, not tree or list" << endl;
	exit(-1);
    }

    if (tree != 0)
    {
	*wgn_coutput << *tree;
	summary_results(*tree,wgn_coutput);
    }

    if (wgn_coutput != &cout)
	delete wgn_coutput;
    return 0;
}