1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.kernel_approximation import RBFSampler
from sklearn.kernel_approximation import AdditiveChi2Sampler
from sklearn.kernel_approximation import SkewedChi2Sampler
from sklearn.metrics.pairwise import rbf_kernel
# generate data
rng = np.random.RandomState(0)
X = rng.random_sample(size=(300, 50))
Y = rng.random_sample(size=(300, 50))
X /= X.sum(axis=1)[:, np.newaxis]
Y /= Y.sum(axis=1)[:, np.newaxis]
def test_additive_chi2_sampler():
"""test that AdditiveChi2Sampler approximates kernel on random data"""
# compute exact kernel
# appreviations for easier formular
X_ = X[:, np.newaxis, :]
Y_ = Y[np.newaxis, :, :]
large_kernel = 2 * X_ * Y_ / (X_ + Y_)
# reduce to n_samples_x x n_samples_y by summing over features
kernel = (large_kernel.sum(axis=2))
# appoximate kernel mapping
transform = AdditiveChi2Sampler(sample_steps=3)
X_trans = transform.fit_transform(X)
Y_trans = transform.transform(Y)
kernel_approx = np.dot(X_trans, Y_trans.T)
np.testing.assert_array_almost_equal(kernel, kernel_approx, 1)
def test_skewed_chi2_sampler():
"""test that RBFSampler approximates kernel on random data"""
# compute exact kernel
c = 0.03
# appreviations for easier formular
X_c = (X + c)[:, np.newaxis, :]
Y_c = (Y + c)[np.newaxis, :, :]
# we do it in log-space in the hope that it's more stable
# this array is n_samples_x x n_samples_y big x n_features
log_kernel = ((np.log(X_c) / 2.) + (np.log(Y_c) / 2.) +
np.log(2.) - np.log(X_c + Y_c))
# reduce to n_samples_x x n_samples_y by summing over features in log-space
kernel = np.exp(log_kernel.sum(axis=2))
# appoximate kernel mapping
transform = SkewedChi2Sampler(skewedness=c, n_components=1000,
random_state=42)
X_trans = transform.fit_transform(X)
Y_trans = transform.transform(Y)
kernel_approx = np.dot(X_trans, Y_trans.T)
np.testing.assert_array_almost_equal(kernel, kernel_approx, 1)
def test_rbf_sampler():
"""test that RBFSampler approximates kernel on random data"""
# compute exact kernel
gamma = 10.
kernel = rbf_kernel(X, Y, gamma=gamma)
# appoximate kernel mapping
rbf_transform = RBFSampler(gamma=gamma, n_components=1000, random_state=42)
X_trans = rbf_transform.fit_transform(X)
Y_trans = rbf_transform.transform(Y)
kernel_approx = np.dot(X_trans, Y_trans.T)
np.testing.assert_array_almost_equal(kernel, kernel_approx, 1)
def test_input_validation():
"""Regression test: kernel approx. transformers should work on lists
No assertions; the old versions would simply crash
"""
X = [[1, 2], [3, 4], [5, 6]]
AdditiveChi2Sampler().fit(X).transform(X)
SkewedChi2Sampler().fit(X).transform(X)
RBFSampler().fit(X).transform(X)
X = csr_matrix(X)
RBFSampler().fit(X).transform(X)
if __name__ == "__main__":
test_additive_chi2_sampler()
test_input_validation()
test_skewed_chi2_sampler()
test_rbf_sampler()
|