1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
|
"""
==============================================================
Restricted Boltzmann Machine features for digit classification
==============================================================
For greyscale image data where pixel values can be interpreted as degrees of
blackness on a white background, like handwritten digit recognition, the
Bernoulli Restricted Boltzmann machine model (:class:`BernoulliRBM
<sklearn.neural_network.BernoulliRBM>`) can perform effective non-linear
feature extraction.
"""
# Authors: Yann N. Dauphin, Vlad Niculae, Gabriel Synnaeve
# License: BSD
# %%
# Generate data
# -------------
#
# In order to learn good latent representations from a small dataset, we
# artificially generate more labeled data by perturbing the training data with
# linear shifts of 1 pixel in each direction.
import numpy as np
from scipy.ndimage import convolve
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
def nudge_dataset(X, Y):
"""
This produces a dataset 5 times bigger than the original one,
by moving the 8x8 images in X around by 1px to left, right, down, up
"""
direction_vectors = [
[[0, 1, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [1, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 1], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 1, 0]],
]
def shift(x, w):
return convolve(x.reshape((8, 8)), mode="constant", weights=w).ravel()
X = np.concatenate(
[X] + [np.apply_along_axis(shift, 1, X, vector) for vector in direction_vectors]
)
Y = np.concatenate([Y for _ in range(5)], axis=0)
return X, Y
X, y = datasets.load_digits(return_X_y=True)
X = np.asarray(X, "float32")
X, Y = nudge_dataset(X, y)
X = minmax_scale(X, feature_range=(0, 1)) # 0-1 scaling
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
# %%
# Models definition
# -----------------
#
# We build a classification pipeline with a BernoulliRBM feature extractor and
# a :class:`LogisticRegression <sklearn.linear_model.LogisticRegression>`
# classifier.
from sklearn import linear_model
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
logistic = linear_model.LogisticRegression(solver="newton-cg", tol=1)
rbm = BernoulliRBM(random_state=0, verbose=True)
rbm_features_classifier = Pipeline(steps=[("rbm", rbm), ("logistic", logistic)])
# %%
# Training
# --------
#
# The hyperparameters of the entire model (learning rate, hidden layer size,
# regularization) were optimized by grid search, but the search is not
# reproduced here because of runtime constraints.
from sklearn.base import clone
# Hyper-parameters. These were set by cross-validation,
# using a GridSearchCV. Here we are not performing cross-validation to
# save time.
rbm.learning_rate = 0.06
rbm.n_iter = 10
# More components tend to give better prediction performance, but larger
# fitting time
rbm.n_components = 100
logistic.C = 6000
# Training RBM-Logistic Pipeline
rbm_features_classifier.fit(X_train, Y_train)
# Training the Logistic regression classifier directly on the pixel
raw_pixel_classifier = clone(logistic)
raw_pixel_classifier.C = 100.0
raw_pixel_classifier.fit(X_train, Y_train)
# %%
# Evaluation
# ----------
from sklearn import metrics
Y_pred = rbm_features_classifier.predict(X_test)
print(
"Logistic regression using RBM features:\n%s\n"
% (metrics.classification_report(Y_test, Y_pred))
)
# %%
Y_pred = raw_pixel_classifier.predict(X_test)
print(
"Logistic regression using raw pixel features:\n%s\n"
% (metrics.classification_report(Y_test, Y_pred))
)
# %%
# The features extracted by the BernoulliRBM help improve the classification
# accuracy with respect to the logistic regression on raw pixels.
# %%
# Plotting
# --------
import matplotlib.pyplot as plt
plt.figure(figsize=(4.2, 4))
for i, comp in enumerate(rbm.components_):
plt.subplot(10, 10, i + 1)
plt.imshow(comp.reshape((8, 8)), cmap=plt.cm.gray_r, interpolation="nearest")
plt.xticks(())
plt.yticks(())
plt.suptitle("100 components extracted by RBM", fontsize=16)
plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
plt.show()
|