1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
|
"""
========================
How to use joblib.Memory
========================
This example illustrates the usage of :class:`joblib.Memory` with both
functions and methods.
"""
###############################################################################
# Without :class:`joblib.Memory`
###############################################################################
#
# ``costly_compute`` emulates a computationally expensive process which later
# will benefit from caching using :class:`joblib.Memory`.
import time
import numpy as np
def costly_compute(data, column_index=0):
"""Simulate an expensive computation"""
time.sleep(5)
return data[column_index]
###############################################################################
# Be sure to set the random seed to generate deterministic data. Indeed, if the
# data is not deterministic, the :class:`joblib.Memory` instance will not be
# able to reuse the cache from one run to another.
rng = np.random.RandomState(42)
data = rng.randn(int(1e5), 10)
start = time.time()
data_trans = costly_compute(data)
end = time.time()
print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))
###############################################################################
# Caching the result of a function to avoid recomputing
###############################################################################
#
# If we need to call our function several times with the same input data, it is
# beneficial to avoid recomputing the same results over and over since it is
# expensive. :class:`joblib.Memory` enables to cache results from a function
# into a specific location.
from joblib import Memory
location = "./cachedir"
memory = Memory(location, verbose=0)
def costly_compute_cached(data, column_index=0):
"""Simulate an expensive computation"""
time.sleep(5)
return data[column_index]
costly_compute_cached = memory.cache(costly_compute_cached)
start = time.time()
data_trans = costly_compute_cached(data)
end = time.time()
print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))
###############################################################################
# At the first call, the results will be cached. Therefore, the computation
# time corresponds to the time to compute the results plus the time to dump the
# results into the disk.
start = time.time()
data_trans = costly_compute_cached(data)
end = time.time()
print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))
###############################################################################
# At the second call, the computation time is largely reduced since the results
# are obtained by loading the data previously dumped to the disk instead of
# recomputing the results.
###############################################################################
# Using :class:`joblib.Memory` with a method
###############################################################################
#
# :class:`joblib.Memory` is designed to work with functions with no side
# effects. When dealing with class, the computationally expensive part of a
# method has to be moved to a function and decorated in the class method.
def _costly_compute_cached(data, column):
time.sleep(5)
return data[column]
class Algorithm(object):
"""A class which is using the previous function."""
def __init__(self, column=0):
self.column = column
def transform(self, data):
costly_compute = memory.cache(_costly_compute_cached)
return costly_compute(data, self.column)
transformer = Algorithm()
start = time.time()
data_trans = transformer.transform(data)
end = time.time()
print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))
###############################################################################
start = time.time()
data_trans = transformer.transform(data)
end = time.time()
print("\nThe function took {:.2f} s to compute.".format(end - start))
print("\nThe transformed data are:\n {}".format(data_trans))
###############################################################################
# As expected, the second call to the ``transform`` method load the results
# which have been cached.
###############################################################################
# Clean up cache directory
###############################################################################
memory.clear(warn=False)
|