File: cross_validate_sequence_labeler.h

package info (click to toggle)
mldemos 0.5.1-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 32,224 kB
  • ctags: 46,525
  • sloc: cpp: 306,887; ansic: 167,718; ml: 126; sh: 109; makefile: 2
file content (152 lines) | stat: -rw-r--r-- 4,993 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// Copyright (C) 2011  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_H__
#define DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_H__

#include "cross_validate_sequence_labeler_abstract.h"
#include <vector>
#include "../matrix.h"
#include "svm.h"


namespace dlib
{

// ----------------------------------------------------------------------------------------

    template <
        typename sequence_labeler_type,
        typename sequence_type 
        >
    const matrix<double> test_sequence_labeler (
        const sequence_labeler_type& labeler,
        const std::vector<sequence_type>& samples,
        const std::vector<std::vector<unsigned long> >& labels
    )
    {
        // make sure requires clause is not broken
        DLIB_ASSERT( is_sequence_labeling_problem(samples, labels) == true,
                    "\tmatrix test_sequence_labeler()"
                    << "\n\t invalid inputs were given to this function"
                    << "\n\t is_sequence_labeling_problem(samples, labels): " 
                    << is_sequence_labeling_problem(samples, labels));

        matrix<double> res(labeler.num_labels(), labeler.num_labels());
        res = 0;

        std::vector<unsigned long> pred;
        for (unsigned long i = 0; i < samples.size(); ++i)
        {
            labeler.label_sequence(samples[i], pred);

            for (unsigned long j = 0; j < pred.size(); ++j)
            {
                const unsigned long truth = labels[i][j];
                if (truth >= static_cast<unsigned long>(res.nr()))
                {
                    // ignore labels the labeler doesn't know about.
                    continue;
                }

                res(truth, pred[j]) += 1;
            }
        }

        return res;
    }

// ----------------------------------------------------------------------------------------

    template <
        typename trainer_type,
        typename sequence_type
        >
    const matrix<double> cross_validate_sequence_labeler (
        const trainer_type& trainer,
        const std::vector<sequence_type>& samples,
        const std::vector<std::vector<unsigned long> >& labels,
        const long folds
    )
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(is_sequence_labeling_problem(samples,labels) == true &&
                    1 < folds && folds <= static_cast<long>(samples.size()),
            "\tmatrix cross_validate_sequence_labeler()"
            << "\n\t invalid inputs were given to this function"
            << "\n\t samples.size(): " << samples.size() 
            << "\n\t folds:  " << folds 
            << "\n\t is_sequence_labeling_problem(samples,labels): " << is_sequence_labeling_problem(samples,labels)
            );

#ifdef ENABLE_ASSERTS
        for (unsigned long i = 0; i < labels.size(); ++i)
        {
            for (unsigned long j = 0; j < labels[i].size(); ++j)
            {
                // make sure requires clause is not broken
                DLIB_ASSERT(labels[i][j] < trainer.num_labels(),
                            "\t matrix cross_validate_sequence_labeler()"
                            << "\n\t The labels are invalid."
                            << "\n\t labels[i][j]: " << labels[i][j] 
                            << "\n\t trainer.num_labels(): " << trainer.num_labels()
                            << "\n\t i: " << i 
                            << "\n\t j: " << j 
                );
            }
        }
#endif




        const long num_in_test = samples.size()/folds;
        const long num_in_train = samples.size() - num_in_test;

        std::vector<sequence_type> x_test, x_train;
        std::vector<std::vector<unsigned long> > y_test, y_train;


        long next_test_idx = 0;

        matrix<double> res;


        for (long i = 0; i < folds; ++i)
        {
            x_test.clear();
            y_test.clear();
            x_train.clear();
            y_train.clear();

            // load up the test samples
            for (long cnt = 0; cnt < num_in_test; ++cnt)
            {
                x_test.push_back(samples[next_test_idx]);
                y_test.push_back(labels[next_test_idx]);
                next_test_idx = (next_test_idx + 1)%samples.size();
            }

            // load up the training samples
            long next = next_test_idx;
            for (long cnt = 0; cnt < num_in_train; ++cnt)
            {
                x_train.push_back(samples[next]);
                y_train.push_back(labels[next]);
                next = (next + 1)%samples.size();
            }


            res += test_sequence_labeler(trainer.train(x_train,y_train), x_test, y_test);

        } // for (long i = 0; i < folds; ++i)

        return res;
    }

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_H__