File: test_multivariate.py

package info (click to toggle)
python-scipy 0.14.0-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 52,228 kB
  • ctags: 63,719
  • sloc: python: 112,726; fortran: 88,685; cpp: 86,979; ansic: 85,860; makefile: 530; sh: 236
file content (275 lines) | stat: -rw-r--r-- 8,605 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
"""
Test functions for multivariate normal distributions.

"""
from __future__ import division, print_function, absolute_import

from numpy.testing import (assert_almost_equal,
        run_module_suite, assert_allclose, assert_equal, assert_raises)

import numpy
import numpy as np

import scipy.linalg
import scipy.stats._multivariate
from scipy.stats import multivariate_normal
from scipy.stats import norm

from scipy.stats._multivariate import _psd_pinv_decomposed_log_pdet

from scipy.integrate import romb


def test_scalar_values():
    np.random.seed(1234)

    # When evaluated on scalar data, the pdf should return a scalar
    x, mean, cov = 1.5, 1.7, 2.5
    pdf = multivariate_normal.pdf(x, mean, cov)
    assert_equal(pdf.ndim, 0)

    # When evaluated on a single vector, the pdf should return a scalar
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))  # Diagonal values for cov. matrix
    pdf = multivariate_normal.pdf(x, mean, cov)
    assert_equal(pdf.ndim, 0)


def test_logpdf():
    # Check that the log of the pdf is in fact the logpdf
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    d1 = multivariate_normal.logpdf(x, mean, cov)
    d2 = multivariate_normal.pdf(x, mean, cov)
    assert_allclose(d1, np.log(d2))


def test_large_pseudo_determinant():
    # Check that large pseudo-determinants are handled appropriately.

    # Construct a singular diagonal covariance matrix
    # whose pseudo determinant overflows double precision.
    large_total_log = 1000.0
    npos = 100
    nzero = 2
    large_entry = np.exp(large_total_log / npos)
    n = npos + nzero
    cov = np.zeros((n, n), dtype=float)
    np.fill_diagonal(cov, large_entry)
    cov[-nzero:, -nzero:] = 0

    # Check some determinants.
    assert_equal(scipy.linalg.det(cov), 0)
    assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf)

    # np.linalg.slogdet is only available in numpy 1.6+
    # but scipy currently supports numpy 1.5.1.
    #assert_allclose(np.linalg.slogdet(cov[:npos, :npos]), (1, large_total_log))

    # Check the pseudo-determinant.
    U, log_pdet = scipy.stats._multivariate._psd_pinv_decomposed_log_pdet(cov)
    assert_allclose(log_pdet, large_total_log)


def test_broadcasting():
    np.random.seed(1234)
    n = 4

    # Construct a random covariance matrix.
    data = np.random.randn(n, n)
    cov = np.dot(data, data.T)
    mean = np.random.randn(n)

    # Construct an ndarray which can be interpreted as
    # a 2x3 array whose elements are random data vectors.
    X = np.random.randn(2, 3, n)

    # Check that multiple data points can be evaluated at once.
    for i in range(2):
        for j in range(3):
            actual = multivariate_normal.pdf(X[i, j], mean, cov)
            desired = multivariate_normal.pdf(X, mean, cov)[i, j]
            assert_allclose(actual, desired)


def test_normal_1D():
    # The probability density function for a 1D normal variable should
    # agree with the standard normal distribution in scipy.stats.distributions
    x = np.linspace(0, 2, 10)
    mean, cov = 1.2, 0.9
    scale = cov**0.5
    d1 = norm.pdf(x, mean, scale)
    d2 = multivariate_normal.pdf(x, mean, cov)
    assert_allclose(d1, d2)


def test_marginalization():
    # Integrating out one of the variables of a 2D Gaussian should
    # yield a 1D Gaussian
    mean = np.array([2.5, 3.5])
    cov = np.array([[.5, 0.2], [0.2, .6]])
    n = 2**8 + 1  # Number of samples
    delta = 6 / (n - 1)  # Grid spacing

    v = np.linspace(0, 6, n)
    xv, yv = np.meshgrid(v, v)
    pos = np.empty((n, n, 2))
    pos[:, :, 0] = xv
    pos[:, :, 1] = yv
    pdf = multivariate_normal.pdf(pos, mean, cov)

    # Marginalize over x and y axis
    margin_x = romb(pdf, delta, axis=0)
    margin_y = romb(pdf, delta, axis=1)

    # Compare with standard normal distribution
    gauss_x = norm.pdf(v, loc=mean[0], scale=cov[0, 0]**0.5)
    gauss_y = norm.pdf(v, loc=mean[1], scale=cov[1, 1]**0.5)
    assert_allclose(margin_x, gauss_x, rtol=1e-2, atol=1e-2)
    assert_allclose(margin_y, gauss_y, rtol=1e-2, atol=1e-2)


def test_frozen():
    # The frozen distribution should agree with the regular one
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    norm_frozen = multivariate_normal(mean, cov)
    assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov))
    assert_allclose(norm_frozen.logpdf(x),
                    multivariate_normal.logpdf(x, mean, cov))


def test_pseudodet_pinv():
    # Make sure that pseudo-inverse and pseudo-det agree on cutoff

    # Assemble random covariance matrix with large and small eigenvalues
    np.random.seed(1234)
    n = 7
    x = np.random.randn(n, n)
    cov = np.dot(x, x.T)
    s, u = scipy.linalg.eigh(cov)
    s = 0.5 * np.ones(n)
    s[0] = 1.0
    s[-1] = 1e-7
    cov = np.dot(u, np.dot(np.diag(s), u.T))

    # Set cond so that the lowest eigenvalue is below the cutoff
    cond = 1e-5
    U, log_pdet = _psd_pinv_decomposed_log_pdet(cov, cond)
    pinv = np.dot(U, U.T)
    _, log_pdet_pinv = _psd_pinv_decomposed_log_pdet(pinv, cond)

    # Check that the log pseudo-determinant agrees with the sum
    # of the logs of all but the smallest eigenvalue
    assert_allclose(log_pdet, np.sum(np.log(s[:-1])))
    # Check that the pseudo-determinant of the pseudo-inverse
    # agrees with 1 / pseudo-determinant
    assert_allclose(-log_pdet, log_pdet_pinv)


def test_exception_nonsquare_cov():
    cov = [[1, 2, 3], [4, 5, 6]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov)


def test_exception_nonfinite_cov():
    cov_nan = [[1, 0], [0, np.nan]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov_nan)
    cov_inf = [[1, 0], [0, np.inf]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov_inf)


def test_exception_non_psd_cov():
    cov = [[1, 0], [0, -1]]
    assert_raises(ValueError, _psd_pinv_decomposed_log_pdet, cov)


def test_R_values():
    # Compare the multivariate pdf with some values precomputed
    # in R version 3.0.1 (2013-05-16) on Mac OS X 10.6.

    # The values below were generated by the following R-script:
    # > library(mnormt)
    # > x <- seq(0, 2, length=5)
    # > y <- 3*x - 2
    # > z <- x + cos(y)
    # > mu <- c(1, 3, 2)
    # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3)
    # > r_pdf <- dmnorm(cbind(x,y,z), mu, Sigma)
    r_pdf = np.array([0.0002214706, 0.0013819953, 0.0049138692,
                      0.0103803050, 0.0140250800])

    x = np.linspace(0, 2, 5)
    y = 3 * x - 2
    z = x + np.cos(y)
    r = np.array([x, y, z]).T

    mean = np.array([1, 3, 2], 'd')
    cov = np.array([[1, 2, 0], [2, 5, .5], [0, .5, 3]], 'd')

    pdf = multivariate_normal.pdf(r, mean, cov)
    assert_allclose(pdf, r_pdf, atol=1e-10)


def test_rvs_shape():
    # Check that rvs parses the mean and covariance correctly, and returns
    # an array of the right shape
    N = 300
    d = 4
    sample = multivariate_normal.rvs(mean=np.zeros(d), cov=1, size=N)
    assert_equal(sample.shape, (N, d))

    sample = multivariate_normal.rvs(mean=None,
                                     cov=np.array([[2, .1], [.1, 1]]),
                                     size=N)
    assert_equal(sample.shape, (N, 2))

    u = multivariate_normal(mean=0, cov=1)
    sample = u.rvs(N)
    assert_equal(sample.shape, (N, ))


def test_large_sample():
    # Generate large sample and compare sample mean and sample covariance
    # with mean and covariance matrix.

    np.random.seed(2846)

    n = 3
    mean = np.random.randn(n)
    M = np.random.randn(n, n)
    cov = np.dot(M, M.T)
    size = 5000

    sample = multivariate_normal.rvs(mean, cov, size)

    assert_allclose(numpy.cov(sample.T), cov, rtol=1e-1)
    assert_allclose(sample.mean(0), mean, rtol=1e-1)


def test_entropy():
    np.random.seed(2846)

    n = 3
    mean = np.random.randn(n)
    M = np.random.randn(n, n)
    cov = np.dot(M, M.T)

    rv = multivariate_normal(mean, cov)

    # Check that frozen distribution agrees with entropy function
    assert_almost_equal(rv.entropy(), multivariate_normal.entropy(mean, cov))
    # Compare entropy with manually computed expression involving
    # the sum of the logs of the eigenvalues of the covariance matrix
    eigs = np.linalg.eig(cov)[0]
    desired = 1/2 * (n * (np.log(2*np.pi) + 1) + np.sum(np.log(eigs)))
    assert_almost_equal(desired, rv.entropy())


if __name__ == "__main__":
    run_module_suite()