File: test_sketches.py

package info (click to toggle)
python-scipy 1.1.0-7
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 93,828 kB
  • sloc: python: 156,854; ansic: 82,925; fortran: 80,777; cpp: 7,505; makefile: 427; sh: 294
file content (61 lines) | stat: -rw-r--r-- 2,000 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""Tests for _sketches.py."""

from __future__ import division, print_function, absolute_import
import numpy as np
from scipy.linalg import clarkson_woodruff_transform

from numpy.testing import assert_


def make_random_dense_gaussian_matrix(n_rows, n_columns, mu=0, sigma=0.01):
    """
    Make some random data with Gaussian distributed values
    """
    np.random.seed(142352345)
    res = np.random.normal(mu, sigma, n_rows*n_columns)
    return np.reshape(res, (n_rows, n_columns))


class TestClarksonWoodruffTransform(object):
    """
    Testing the Clarkson Woodruff Transform
    """
    # Big dense matrix dimensions
    n_matrix_rows = 2000
    n_matrix_columns = 100

    # Sketch matrix dimensions
    n_sketch_rows = 100

    # Error threshold
    threshold = 0.1

    dense_big_matrix = make_random_dense_gaussian_matrix(n_matrix_rows,
                                                         n_matrix_columns)

    def test_sketch_dimensions(self):
        sketch = clarkson_woodruff_transform(self.dense_big_matrix,
                                             self.n_sketch_rows)

        assert_(sketch.shape == (self.n_sketch_rows,
                                 self.dense_big_matrix.shape[1]))

    def test_sketch_rows_norm(self):
        # Given the probabilistic nature of the sketches
        # we run the 'test' multiple times and check that
        # we pass all/almost all the tries
        n_errors = 0

        seeds = [1755490010, 934377150, 1391612830, 1752708722, 2008891431,
                 1302443994, 1521083269, 1501189312, 1126232505, 1533465685]

        for seed_ in seeds:
            sketch = clarkson_woodruff_transform(self.dense_big_matrix,
                                                 self.n_sketch_rows, seed_)

            # We could use other norms (like L2)
            err = np.linalg.norm(self.dense_big_matrix) - np.linalg.norm(sketch)
            if err > self.threshold:
                n_errors += 1

        assert_(n_errors == 0)