File: linearly_independent_subset_finder_abstract.h

package info (click to toggle)
mldemos 0.5.1-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 32,224 kB
  • ctags: 46,525
  • sloc: cpp: 306,887; ansic: 167,718; ml: 126; sh: 109; makefile: 2
file content (311 lines) | stat: -rw-r--r-- 11,357 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
// Copyright (C) 2008  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_LISf_ABSTRACT_
#ifdef DLIB_LISf_ABSTRACT_

#include "../algs.h"
#include "../serialize.h"
#include "kernel_abstract.h"

namespace dlib
{

    template <
        typename kernel_type
        >
    class linearly_independent_subset_finder
    {
        /*!
            REQUIREMENTS ON kernel_type
                is a kernel function object as defined in dlib/svm/kernel_abstract.h 

            INITIAL VALUE
                - size() == 0

            WHAT THIS OBJECT REPRESENTS
                This is an implementation of an online algorithm for recursively finding a
                set (aka dictionary) of linearly independent vectors in a kernel induced 
                feature space.  To use it you decide how large you would like the dictionary 
                to be and then you feed it sample points.  

                The implementation uses the Approximately Linearly Dependent metric described 
                in the paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel to 
                decide which points are more linearly independent than others.  The metric is 
                simply the squared distance between a test point and the subspace spanned by 
                the set of dictionary vectors.

                Each time you present this object with a new sample point (via this->add()) 
                it calculates the projection distance and if it is sufficiently large then this 
                new point is included into the dictionary.  Note that this object can be configured 
                to have a maximum size.  Once the max dictionary size is reached each new point 
                kicks out a previous point.  This is done by removing the dictionary vector that 
                has the smallest projection distance onto the others.  That is, the "least linearly 
                independent" vector is removed to make room for the new one.
        !*/

    public:
        typedef typename kernel_type::scalar_type scalar_type;
        typedef typename kernel_type::sample_type sample_type;
        typedef typename kernel_type::sample_type type;
        typedef typename kernel_type::mem_manager_type mem_manager_type;

        linearly_independent_subset_finder (
        );
        /*!
            ensures
                - #minimum_tolerance() == 0.001 
                - this object is properly initialized
                - #get_kernel() == kernel_type()  (i.e. whatever the default is for the supplied kernel) 
                - #max_dictionary_size() == 100 
        !*/

        linearly_independent_subset_finder (
            const kernel_type& kernel_, 
            unsigned long max_dictionary_size_,
            scalar_type min_tolerance = 0.001
        );
        /*!
            requires
                - min_tolerance > 0
                - max_dictionary_size > 1
            ensures
                - #minimum_tolerance() == min_tolerance
                - this object is properly initialized
                - #get_kernel() == kernel_
                - #max_dictionary_size() == max_dictionary_size_
        !*/

        const kernel_type& get_kernel (
        ) const;
        /*!
            ensures
                - returns a const reference to the kernel used by this object
        !*/

        unsigned long max_dictionary_size(
        ) const;
        /*!
            ensures
                - returns the maximum number of dictionary vectors this object
                  will accumulate.  That is, size() will never be
                  greater than max_dictionary_size().
        !*/

        scalar_type minimum_tolerance(
        ) const;
        /*!
            ensures
                - returns the minimum projection error necessary to include a sample point
                  into the dictionary.   
        !*/

        void set_minimum_tolerance (
            scalar_type min_tolerance 
        );
        /*!
            requires
                - min_tolerance > 0
            ensures
                - #minimum_tolerance() == min_tolerance
        !*/

        void clear_dictionary (
        );
        /*!
            ensures
                - clears out all the data (e.g. #size() == 0)
        !*/

        bool add (
            const sample_type& x
        );
        /*!
            ensures
                - if (size() < max_dictionary_size() then
                    - if (projection_error(x) > minimum_tolerance()) then 
                        - adds x into the dictionary
                        - (*this)[#size()-1] == x
                        - #size() == size() + 1
                        - returns true
                    - else
                        - the dictionary is not changed
                        - returns false
                - else
                    - #size() == size() 
                      (i.e. the number of vectors in this object doesn't change)
                    - since the dictionary is full adding a new element means we have to 
                      remove one of the current ones.  So let proj_error[i] be equal to the 
                      projection error obtained when projecting dictionary vector (*this)[i] 
                      onto the other elements of the dictionary.  Then let min_proj_error 
                      be equal to the minimum value in proj_error.  The dictionary element
                      with the minimum projection error is the "least linearly independent"
                      vector in the dictionary and is the one which will be removed to make
                      room for a new element.
                    - if (projection_error(x) > minimum_tolerance() && projection_error(x) > min_proj_error)
                        - the least linearly independent vector in this object is removed
                        - adds x into the dictionary
                        - (*this)[#size()-1] == x
                        - returns true
                    - else
                        - the dictionary is not changed
                        - returns false
        !*/

        scalar_type projection_error (
            const sample_type& x
        ) const;
        /*!
            ensures
                - returns the squared distance between x and the subspace spanned by 
                  the set of dictionary vectors.  (e.g. this is the same number that
                  gets returned by the empirical_kernel_map::project() function's 
                  projection_error argument when the ekm is loaded with the dictionary
                  vectors.)
                - Note that if the dictionary is empty then the return value is
                  equal to get_kernel()(x,x).
        !*/

        void swap (
            linearly_independent_subset_finder& item
        );
        /*!
            ensures
                - swaps *this with item
        !*/

        unsigned long size (
        ) const;
        /*!
            ensures
                - returns the number of vectors in the dictionary.  
        !*/

        const sample_type& operator[] (
            unsigned long index
        ) const;
        /*!
            requires
                - index < size()
            ensures
                - returns the index'th element in the set of linearly independent 
                  vectors contained in this object.
        !*/

        const matrix<sample_type,0,1,mem_manager_type> get_dictionary (
        ) const;
        /*!
            ensures
                - returns a column vector that contains all the dictionary
                  vectors in this object.
        !*/

        const matrix<scalar_type,0,0,mem_manager_type>& get_kernel_matrix (
        ) const;
        /*!
            ensures
                - returns a matrix K such that:
                    - K.nr() == K.nc() == size()
                    - K == kernel_matrix(get_kernel(), get_dictionary())
                      i.e. K == the kernel matrix for the dictionary vectors
        !*/

        const matrix<scalar_type,0,0,mem_manager_type>& get_inv_kernel_marix (
        ) const;
        /*!
            ensures
                - if (size() != 0)
                    - returns inv(get_kernel_matrix())
                - else
                    - returns an empty matrix
        !*/
    };

// ----------------------------------------------------------------------------------------

    template <
        typename kernel_type
        >
    void swap(
        linearly_independent_subset_finder<kernel_type>& a, 
        linearly_independent_subset_finder<kernel_type>& b
    ) { a.swap(b); }
    /*!
        provides a global swap function
    !*/

    template <
        typename kernel_type
        >
    void serialize (
        const linearly_independent_subset_finder<kernel_type>& item,
        std::ostream& out
    );
    /*!
        provides serialization support for linearly_independent_subset_finder objects
    !*/

    template <
        typename kernel_type 
        >
    void deserialize (
        linearly_independent_subset_finder<kernel_type>& item,
        std::istream& in 
    );
    /*!
        provides serialization support for linearly_independent_subset_finder objects
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename kernel_type,
        typename vector_type,
        typename rand_type
        >
    void fill_lisf (
        linearly_independent_subset_finder<kernel_type>& lisf,
        const vector_type& samples,
        rand_type& rnd,
        int sampling_size = 2000
    );
    /*!
        requires
            - vector_type == a dlib::matrix or something convertible to one via 
              vector_to_matrix()
            - is_vector(vector_to_matrix(samples)) == true
            - rand_type == an implementation of rand/rand_kernel_abstract.h or a type
              convertible to a string via cast_to_string()
            - sampling_size > 0
        ensures
            - The purpose of this function is to fill lisf with points from samples.  It does
              this by randomly sampling elements of samples until no more can be added.  The
              precise stopping condition is when sampling_size additions to lisf have failed
              or the max dictionary size has been reached.
            - This function employs a random number generator.  If rand_type is a random 
              number generator then it uses the instance given.  Otherwise it uses cast_to_string(rnd)
              to seed a new random number generator.
    !*/

    template <
        typename kernel_type,
        typename vector_type
        >
    void fill_lisf (
        linearly_independent_subset_finder<kernel_type>& lisf,
        const vector_type& samples
    );
    /*!
        requires
            - vector_type == a dlib::matrix or something convertible to one via 
              vector_to_matrix()
            - is_vector(vector_to_matrix(samples)) == true
        ensures
            - performs fill_lisf(lisf, samples, default_rand_generator, 2000)
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_LISf_ABSTRACT_