File: bench_plot_lasso_path.py

package info (click to toggle)
scikit-learn 0.18-5
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 71,040 kB
  • ctags: 91,142
  • sloc: python: 97,257; ansic: 8,360; cpp: 5,649; makefile: 242; sh: 238
file content (117 lines) | stat: -rw-r--r-- 4,005 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Benchmarks of Lasso regularization path computation using Lars and CD

The input data is mostly low rank but is a fat infinite tail.
"""
from __future__ import print_function

from collections import defaultdict
import gc
import sys
from time import time

import numpy as np

from sklearn.linear_model import lars_path
from sklearn.linear_model import lasso_path
from sklearn.datasets.samples_generator import make_regression


def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print('====================')
            print('Iteration %03d of %03d' % (it, max_it))
            print('====================')
            dataset_kwargs = {
                'n_samples': n_samples,
                'n_features': n_features,
                'n_informative': n_features / 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print("n_samples: %d" % n_samples)
            print("n_features: %d" % n_features)
            X, y = make_regression(**dataset_kwargs)

            gc.collect()
            print("benchmarking lars_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lars_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (without Gram)'].append(delta)

    return results


if __name__ == '__main__':
    from mpl_toolkits.mplot3d import axes3d  # register the 3d projection
    import matplotlib.pyplot as plt

    samples_range = np.linspace(10, 2000, 5).astype(np.int)
    features_range = np.linspace(10, 2000, 5).astype(np.int)
    results = compute_bench(samples_range, features_range)

    max_time = max(max(t) for t in results.values())

    fig = plt.figure('scikit-learn Lasso path benchmark results')
    i = 1
    for c, (label, timings) in zip('bcry', sorted(results.items())):
        ax = fig.add_subplot(2, 2, i, projection='3d')
        X, Y = np.meshgrid(samples_range, features_range)
        Z = np.asarray(timings).reshape(samples_range.shape[0],
                                        features_range.shape[0])

        # plot the actual surface
        ax.plot_surface(X, Y, Z.T, cstride=1, rstride=1, color=c, alpha=0.8)

        # dummy point plot to stick the legend to since surface plot do not
        # support legends (yet?)
        # ax.plot([1], [1], [1], color=c, label=label)

        ax.set_xlabel('n_samples')
        ax.set_ylabel('n_features')
        ax.set_zlabel('Time (s)')
        ax.set_zlim3d(0.0, max_time * 1.1)
        ax.set_title(label)
        # ax.legend()
        i += 1
    plt.show()