File: regression.py

package info (click to toggle)
python-scipy 0.5.2-0.1
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 33,888 kB
  • ctags: 44,231
  • sloc: ansic: 156,256; cpp: 90,347; python: 89,604; fortran: 73,083; sh: 1,318; objc: 424; makefile: 342
file content (132 lines) | stat: -rw-r--r-- 4,354 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from ctypes import POINTER, c_double
import numpy as N

from model import LibSvmModel
import libsvm

__all__ = [
    'LibSvmEpsilonRegressionModel',
    'LibSvmNuRegressionModel',
    'LibSvmRegressionResults'
    ]

# XXX document why get_svr_probability could be useful

class LibSvmRegressionResults:
    def __init__(self, model, traindataset, kernel, PredictorType):
        modelc = model.contents
        if modelc.param.svm_type not in [libsvm.EPSILON_SVR, libsvm.NU_SVR]:
            raise TypeError, '%s is for regression problems' % \
                str(self.__class__)
        self.rho = modelc.rho[0]
        self.sv_coef = modelc.sv_coef[0][:modelc.l]
        if modelc.probA:
            self.sigma = modelc.probA[0]
        self.predictor = PredictorType(model, traindataset, kernel)

    def predict(self, dataset):
        """
        This function does regression on a test vector x and returns
        the function value of x calculated using the model.
        """
        z = [self.predictor.predict(x) for x in dataset]
        return N.asarray(z).squeeze()

    def get_svr_probability(self):
        """
        This function returns a value sigma > 0, which is a parameter
        in the probability model for the test data.

        For test data, we consider the probability model:

        target value = predicted value + z

        where z is the Laplace distribution: e^(-|x|/sigma)/(2*sigma).
        """
        return self.sigma

    def compact(self):
        self.predictor.compact()

class LibSvmRegressionModel(LibSvmModel):
    ResultsType = LibSvmRegressionResults

    def __init__(self, kernel, **kwargs):
        LibSvmModel.__init__(self, kernel, **kwargs)

    def cross_validate(self, dataset, nr_fold):
        """
        Perform stratified cross-validation to determine the
        suitability of chosen model parameters.

        Data are separated to nr_fold folds. Each fold is validated
        against a model trained using the data from the remaining
        (nr_fold-1) folds.

        This function returns a 2-tuple containing the mean squared
        error and the squared correlation coefficient.
        """

        problem = dataset._create_svm_problem()
        target = N.empty((len(dataset.data),), dtype=N.float64)
        tp = target.ctypes.data_as(POINTER(c_double))
        libsvm.svm_cross_validation(problem, self.param, nr_fold, tp)

        total_error = sumv = sumy = sumvv = sumyy = sumvy = 0.
        for i in range(len(dataset.data)):
            v = target[i]
            y = dataset.data[i][0]
            sumv = sumv + v
            sumy = sumy + y
            sumvv = sumvv + v * v
            sumyy = sumyy + y * y
            sumvy = sumvy + v * y
            total_error = total_error + (v - y) * (v - y)

        # mean squared error
        mse = total_error / len(dataset.data)
        # squared correlation coefficient
        l = len(dataset.data)
        scc = ((l * sumvy - sumv * sumy) * (l * sumvy - sumv * sumy)) / \
            ((l * sumvv - sumv*sumv) * (l * sumyy - sumy * sumy))

        return mse, scc

class LibSvmEpsilonRegressionModel(LibSvmRegressionModel):
    """
    A model for epsilon-SV regression.

    See also:

    - Smola, Schoelkopf. A Tutorial on Support Vector Regression.
    - Gunn. Support Vector Machines for Classification and Regression.
    - Mueller, Vapnik. Using Support Vector Machines for Time Series
      Prediction.
    """

    def __init__(self, kernel, epsilon=0.1, cost=1.0,
                 probability=False, **kwargs):
        LibSvmRegressionModel.__init__(self, kernel, **kwargs)
        self.epsilon = epsilon
        self.cost = cost
        self.param.svm_type = libsvm.EPSILON_SVR
        self.param.p = epsilon
        self.param.C = cost
        self.param.probability = probability

class LibSvmNuRegressionModel(LibSvmRegressionModel):
    """
    A model for nu-SV regression.

    See also: Schoelkopf, et al. New Support Vector Algorithms.
    """

    def __init__(self, kernel,
                 nu=0.5, cost=1.0, probability=False, **kwargs):
        LibSvmRegressionModel.__init__(self, kernel, **kwargs)
        self.nu = nu
        self.cost = cost
        self.param.svm_type = libsvm.NU_SVR
        self.param.nu = nu
        self.param.C = cost
        self.param.probability = probability