File: bench_plot_svd.py

package info (click to toggle)
scikit-learn 0.11.0-2%2Bdeb7u1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 13,900 kB
  • sloc: python: 34,740; ansic: 8,860; cpp: 8,849; pascal: 230; makefile: 211; sh: 14
file content (81 lines) | stat: -rw-r--r-- 2,866 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Benchmarks of Singular Values Decomposition (Exact and Approximate)

The data is mostly low rank but is a fat infinite tail.
"""
import gc
from time import time
import numpy as np
from collections import defaultdict

from scipy.linalg import svd
from sklearn.utils.extmath import randomized_svd
from sklearn.datasets.samples_generator import make_low_rank_matrix


def compute_bench(samples_range, features_range, n_iterations=3, rank=50):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            X = make_low_rank_matrix(n_samples, n_features,
                                  effective_rank=rank,
                                  tail_strength=0.2)

            gc.collect()
            print "benching scipy svd: "
            tstart = time()
            svd(X, full_matrices=False)
            results['scipy svd'].append(time() - tstart)

            gc.collect()
            print "benching scikit-learn randomized_svd: n_iterations=0"
            tstart = time()
            randomized_svd(X, rank, n_iterations=0)
            results['scikit-learn randomized_svd (n_iterations=0)'].append(
                time() - tstart)

            gc.collect()
            print ("benching scikit-learn randomized_svd: n_iterations=%d "
                   % n_iterations)
            tstart = time()
            randomized_svd(X, rank, n_iterations=n_iterations)
            results['scikit-learn randomized_svd (n_iterations=%d)'
                    % n_iterations].append(time() - tstart)

    return results


if __name__ == '__main__':
    from mpl_toolkits.mplot3d import axes3d  # register the 3d projection
    import matplotlib.pyplot as plt

    samples_range = np.linspace(2, 1000, 4).astype(np.int)
    features_range = np.linspace(2, 1000, 4).astype(np.int)
    results = compute_bench(samples_range, features_range)

    fig = plt.figure()
    ax = fig.gca(projection='3d')
    for c, (label, timings) in zip('rbg', sorted(results.iteritems())):
        X, Y = np.meshgrid(samples_range, features_range)
        Z = np.asarray(timings).reshape(samples_range.shape[0],
                                        features_range.shape[0])
        # plot the actual surface
        ax.plot_surface(X, Y, Z, rstride=8, cstride=8, alpha=0.3,
                        color=c)
        # dummy point plot to stick the legend to since surface plot do not
        # support legends (yet?)
        ax.plot([1], [1], [1], color=c, label=label)

    ax.set_xlabel('n_samples')
    ax.set_ylabel('n_features')
    ax.set_zlabel('time (s)')
    ax.legend()
    plt.show()