File: knncoremodule.hpp

package info (click to toggle)
gamera 1%3A3.4.2%2Bgit20160808.1725654-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 22,312 kB
  • ctags: 24,991
  • sloc: xml: 122,324; ansic: 52,869; cpp: 50,664; python: 35,034; makefile: 118; sh: 101
file content (182 lines) | stat: -rw-r--r-- 6,435 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/*
 * Copyright (C) 2001-2009 Ichiro Fujinaga, Michael Droettboom,
 *                         Karl MacMillan, and Christoph Dalitz
 *               2012      David Kolanus, Tobias Bolten
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#ifndef KnnObject201206
#define KnnObject201206

#include <Python.h>
#include <vector>
#include "gameramodule.hpp"
#include "knn.hpp"
#include "knnmodule.hpp"

namespace Gamera { namespace kNN {
#if 0
  static PyTypeObject KnnType = {
    PyObject_HEAD_INIT(NULL)
    0,
  };
#endif
  /*
    The KnnObject holds all of the information needed by knn. Unlike
    many of the parts of Gamera, there is a significant amount of
    functionality implemented in this module rather than just a
    wrapper around a C++ objects/code.
  */
  struct KnnObject {
    PyObject_HEAD
    // the number of features in each feature vector
    size_t num_features;

    /*
      The feature vectors.
      A vector to double arrays of size num_features is used to store the
      feature vectors. It is only used for non-interactive classification).
    */
    std::vector<double*> *feature_vectors;

    // The id_names for the feature vectors
    char** id_names;
    // confidence types to be computed during classification
    std::vector<int> *confidence_types;
    // The current selected features
    int *selection_vector;
    // The current weights applied to the distance calculation
    double* weight_vector;
    // a histogram of the id_names for use in leave-one-out
    int* id_name_histogram;
    /*
      The normalization applied to the feature vectors prior to distance
      calculation.
    */
    Normalize* normalize;
    /*
      Temporary storage for the unknown feature vector. This is simply to avoid
      allocating memory for each call to classify (and could potentially
      increase our cache hit rate, but who really knows).
    */
    double* unknown;
    // k - this is k-NN after all
    size_t num_k;
    // the distance type currently being used.
    DistanceType distance_type;
  };

  /*
    String comparison functors used by the kNearestNeighbors object
  */
  struct ltstr {
    bool operator()(const char* s1, const char* s2) const {
      return strcmp(s1, s2) < 0;
    }
  };
  struct eqstr {
    bool operator()(const char* s1, const char* s2) const {
      return strcmp(s1, s2) == 0;
    }
  };

  static std::pair<int,int> leave_one_out(KnnObject* o, int stop_threshold,
                                          int* selection_vector = 0,
                                          double* weight_vector = 0,
                                          std::vector<long>* indexes = 0) {
    int* selections = selection_vector;
    if (selections == 0) {
      selections = o->selection_vector;
    }

    double* weights = weight_vector;
    if (weights == 0) {
      weights = o->weight_vector;
    }

    assert(o->feature_vectors != 0);
    kNearestNeighbors<char*, ltstr, eqstr> knn(o->num_k);

    int total_correct = 0;
    int total_queries = 0;
    if (indexes == 0) {
      for (size_t i = 0; i < o->feature_vectors->size(); ++i/*, cur += o->num_features*/) {
        // We don't want to do the calculation if there is no
        // hope that kNN will return the correct answer (because
        // there aren't enough examples in the database).
        if (o->id_name_histogram[i] < int((o->num_k + 0.5) / 2)) {
          continue;
        }
        double* current_known;
        double* unknown = (*o->feature_vectors)[i];
        for (size_t j = 0; j < o->feature_vectors->size(); ++j) {
          current_known = (*o->feature_vectors)[j];
          if (i == j)
            continue;
          double distance;
          compute_distance(o->distance_type, current_known, o->num_features,
                           unknown, &distance, selections, weights);
          knn.add(o->id_names[j], distance);
        }
        knn.majority();
        if (strcmp(knn.answer[0].first, o->id_names[i]) == 0) {
          total_correct++;
        }
        knn.reset();
        total_queries++;
        if (total_queries - total_correct > stop_threshold)
          return std::make_pair(total_correct, total_queries);
      }
    } else {
      for (size_t i = 0; i < o->feature_vectors->size(); ++i) {
        if (o->id_name_histogram[i] < int((o->num_k + 0.5) / 2))
          continue;
        double* current_known;
        double* unknown = (*o->feature_vectors)[i];
        for (size_t j = 0; j < o->feature_vectors->size(); ++j) {
          current_known = (*o->feature_vectors)[j];
          if (i == j)
            continue;
          double distance;
          if (o->distance_type == CITY_BLOCK) {
            distance = city_block_distance_skip(current_known, unknown, selections, weights,
                                                indexes->begin(), indexes->end());
          } else if (o->distance_type == FAST_EUCLIDEAN) {
            distance = fast_euclidean_distance_skip(current_known, unknown, selections, weights,
                                                    indexes->begin(), indexes->end());
          } else {
            distance = euclidean_distance_skip(current_known, unknown, selections, weights,
                                               indexes->begin(), indexes->end());
          }

          knn.add(o->id_names[j], distance);
        }
        knn.majority();
        if (strcmp(knn.answer[0].first, o->id_names[i]) == 0) {
          total_correct++;
        }
        knn.reset();
        total_queries++;
        if (total_queries - total_correct > stop_threshold)
          return std::make_pair(total_correct, total_queries);
      }
    }
    return std::make_pair(total_correct, total_queries);
  }

}} // end of namespaces

#endif