File: memory_basic_usage.py

package info (click to toggle)
joblib 1.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,284 kB
  • sloc: python: 15,669; sh: 124; makefile: 39
file content (139 lines) | stat: -rw-r--r-- 4,698 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
========================
How to use joblib.Memory
========================

This example illustrates the usage of :class:`joblib.Memory` with both
functions and methods.

"""

###############################################################################
# Without :class:`joblib.Memory`
###############################################################################
#
# ``costly_compute`` emulates a computationally expensive process which later
# will benefit from caching using :class:`joblib.Memory`.

import time

import numpy as np


def costly_compute(data, column_index=0):
    """Simulate an expensive computation"""
    time.sleep(5)
    return data[column_index]


###############################################################################
# Be sure to set the random seed to generate deterministic data. Indeed, if the
# data is not deterministic, the :class:`joblib.Memory` instance will not be
# able to reuse the cache from one run to another.

rng = np.random.RandomState(42)
data = rng.randn(int(1e5), 10)
start = time.time()
data_trans = costly_compute(data)
end = time.time()

print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))

###############################################################################
# Caching the result of a function to avoid recomputing
###############################################################################
#
# If we need to call our function several times with the same input data, it is
# beneficial to avoid recomputing the same results over and over since it is
# expensive. :class:`joblib.Memory` enables to cache results from a function
# into a specific location.

from joblib import Memory

location = "./cachedir"
memory = Memory(location, verbose=0)


def costly_compute_cached(data, column_index=0):
    """Simulate an expensive computation"""
    time.sleep(5)
    return data[column_index]


costly_compute_cached = memory.cache(costly_compute_cached)
start = time.time()
data_trans = costly_compute_cached(data)
end = time.time()

print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))

###############################################################################
# At the first call, the results will be cached. Therefore, the computation
# time corresponds to the time to compute the results plus the time to dump the
# results into the disk.

start = time.time()
data_trans = costly_compute_cached(data)
end = time.time()

print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))

###############################################################################
# At the second call, the computation time is largely reduced since the results
# are obtained by loading the data previously dumped to the disk instead of
# recomputing the results.

###############################################################################
# Using :class:`joblib.Memory` with a method
###############################################################################
#
# :class:`joblib.Memory` is designed to work with functions with no side
# effects. When dealing with class, the computationally expensive part of a
# method has to be moved to a function and decorated in the class method.


def _costly_compute_cached(data, column):
    time.sleep(5)
    return data[column]


class Algorithm(object):
    """A class which is using the previous function."""

    def __init__(self, column=0):
        self.column = column

    def transform(self, data):
        costly_compute = memory.cache(_costly_compute_cached)
        return costly_compute(data, self.column)


transformer = Algorithm()
start = time.time()
data_trans = transformer.transform(data)
end = time.time()

print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))

###############################################################################

start = time.time()
data_trans = transformer.transform(data)
end = time.time()

print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))

###############################################################################
# As expected, the second call to the ``transform`` method load the results
# which have been cached.

###############################################################################
# Clean up cache directory
###############################################################################

memory.clear(warn=False)