File: predictions.c

package info (click to toggle)
autoclass 3.3.4-6
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 3,844 kB
  • ctags: 994
  • sloc: ansic: 16,674; makefile: 123; sh: 98; cpp: 95; csh: 77
file content (132 lines) | stat: -rw-r--r-- 4,623 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <time.h> 
#ifndef _MSC_VER
#include <sys/param.h>
#endif
#include "autoclass.h"
#include "globals.h"


/* AUTOCLASS_PREDICT
   18may95 wmt: adapted from ac-x::predict-class
   10apr97 wmt: add database->n_data to copy_class_DS call
   30jun00 wmt: allocate separate storage for test_clsf->reports->class_wt_ordering 
   
   use an autoclass "training" classification to predict 
   class membership of cases in a "test" data base.
   */
clsf_DS autoclass_predict( char *data_file_ptr, clsf_DS training_clsf,
                           clsf_DS test_clsf, FILE *log_file_fp,
                           char *log_file_ptr)
{
  FILE *stream = stdout;
  FILE *header_file_fp = NULL, *model_file_fp = NULL;
  char *header_file_ptr, *model_file_ptr;
  shortstr start_fn_type = "block";
  int want_wts_p = TRUE, n_classes = training_clsf->n_classes;
  int num_classes = 1, reread_p = FALSE, regenerate_p = FALSE, n_class;
  int restart_p = FALSE, initial_cycles_p = FALSE, n_data = 0;
  int start_j_list_from_s_params = FALSE;

  /* ------------------------------------------------------------*/

  G_training_clsf = training_clsf;
  G_prediction_p = TRUE;
  if (test_clsf != NULL) {      /* do not print out input checking again */
    stream = NULL;
    G_stream = NULL;
    log_file_fp = NULL;
    log_file_ptr = NULL;
  }
  /* get test database */
  header_file_ptr = training_clsf->database->header_file;
  model_file_ptr = training_clsf->models[0]->model_file;
  if (eqstring( header_file_ptr, "") != TRUE)
    header_file_fp = fopen( header_file_ptr, "r");
  if (eqstring( model_file_ptr, "") != TRUE)
    model_file_fp = fopen( model_file_ptr, "r");
  test_clsf = generate_clsf( num_classes, header_file_fp, model_file_fp,
                            log_file_fp, stream, reread_p, regenerate_p,
                            data_file_ptr, header_file_ptr, model_file_ptr,
                            log_file_ptr, restart_p, start_fn_type,
                            initial_cycles_p, n_data, start_j_list_from_s_params);
  if (header_file_fp != NULL)
    fclose( header_file_fp);
  if (model_file_fp != NULL)
    fclose( model_file_fp);

  init_clsf_for_reports( test_clsf, G_prediction_p);
 
  if (test_clsf != NULL)
    G_stream = stdout;

  /* use weight ordering from training clsf */
  test_clsf->reports->n_class_wt_ordering = training_clsf->reports->n_class_wt_ordering; 
  /* test_clsf->reports->class_wt_ordering = training_clsf->reports->class_wt_ordering; */
  /* allocate separate storage */
  test_clsf->reports->class_wt_ordering =  get_class_weight_ordering( training_clsf);
  /* create training classes in test_clsf in order to store the predicted weights */
  test_clsf->classes =
    (class_DS *) realloc( test_clsf->classes, n_classes * sizeof( class_DS));
  test_clsf->n_classes = n_classes;
  for (n_class=num_classes; n_class<n_classes; n_class++)
    test_clsf->classes[n_class] = copy_class_DS( test_clsf->classes[0],
                                                 test_clsf->database->n_data,
                                                 want_wts_p);

  if (same_model_and_attributes( test_clsf, training_clsf) == FALSE) {
    fprintf( stdout, "ERROR: training classification & test data have different "
            "models and/or different attributes \n");
    exit (1);
  }

  update_wts( training_clsf, test_clsf);

  return (test_clsf);
}


/* SAME_MODEL_AND_ATTRIBUTES
   20may95 wmt: new

   check if two clsfs have the same model and attributes --
   used by autoclass_predict
   */
int same_model_and_attributes( clsf_DS clsf1, clsf_DS clsf2)
{   
  int i;
  model_DS model1, model2;
  database_DS db1, db2;
  att_DS att1, att2;

  if ((clsf1->num_models != 1) || (clsf2->num_models != 1)) {
    fprintf( stderr, "ERROR: -predict assumes only one model\n");
    exit (1);
  }
  model1 = clsf1->models[0];
  model2 = clsf2->models[0];
  db1 = clsf1->database;
  db2 = clsf2->database;
  if ((eqstring( model1->model_file, model2->model_file)) &&
      (model1->file_index == model2->file_index) &&
      (db1->n_atts == db2->n_atts)) {
    for (i=0; i<db1->n_atts; i++) {
      att1 = db1->att_info[i];
      att2 = db2->att_info[i];
      if (eqstring( att1->type, att2->type) == FALSE)
        return(FALSE);
      if (eqstring( att1->sub_type, att2->sub_type) == FALSE)
        return(FALSE);
      if (eqstring( att1->dscrp, att2->dscrp) == FALSE)
        return(FALSE);
      if (att1->n_props != att2->n_props)
        return(FALSE);
    }
    return(TRUE);
  }
  else
    return(FALSE);
}