File: example_lm.py

package info (click to toggle)
patsy 0.4.1%2Bgit34-ga5b54c2-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,444 kB
  • ctags: 884
  • sloc: python: 8,797; makefile: 130; sh: 15
file content (38 lines) | stat: -rw-r--r-- 1,719 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np
from patsy import dmatrices, build_design_matrices

class LM(object):
    """An example ordinary least squares linear model class, analogous to R's
    lm() function. Don't use this in real life, it isn't properly tested."""
    def __init__(self, formula_like, data={}):
        y, x = dmatrices(formula_like, data, 1)
        self.nobs = x.shape[0]
        self.betas, self.rss, _, _ = np.linalg.lstsq(x, y)
        self._y_design_info = y.design_info
        self._x_design_info = x.design_info

    def __repr__(self):
        summary = ("Ordinary least-squares regression\n"
                   "  Model: %s ~ %s\n"
                   "  Regression (beta) coefficients:\n"
                   % (self._y_design_info.describe(),
                      self._x_design_info.describe()))
        for name, value in zip(self._x_design_info.column_names, self.betas):
            summary += "    %s:  %0.3g\n" % (name, value[0])
        return summary

    def predict(self, new_data):
        (new_x,) = build_design_matrices([self._x_design_info],
                                         new_data)
        return np.dot(new_x, self.betas)

    def loglik(self, new_data):
        (new_y, new_x) = build_design_matrices([self._y_design_info,
                                                self._x_design_info],
                                               new_data)
        new_pred = np.dot(new_x, self.betas)
        sigma2 = self.rss / self.nobs
        # It'd be more elegant to use scipy.stats.norm.logpdf here, but adding
        # a dependency on scipy makes the docs build more complicated:
        Z = -0.5 * np.log(2 * np.pi * sigma2)
        return Z + -0.5 * (new_y - new_x) ** 2/sigma2