1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
"""
Testing for the nearest centroid module.
"""
import numpy as np
from scipy import sparse as sp
from numpy.testing import assert_array_equal
from numpy.testing import assert_equal
from sklearn.neighbors import NearestCentroid
from sklearn import datasets
from sklearn.metrics.pairwise import pairwise_distances
# toy sample
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
X_csr = sp.csr_matrix(X) # Sparse matrix
y = [-1, -1, -1, 1, 1, 1]
T = [[-1, -1], [2, 2], [3, 2]]
T_csr = sp.csr_matrix(T)
true_result = [-1, 1, 1]
# also load the iris dataset
# and randomly permute it
iris = datasets.load_iris()
rng = np.random.RandomState(1)
perm = rng.permutation(iris.target.size)
iris.data = iris.data[perm]
iris.target = iris.target[perm]
def test_classification_toy():
"""Check classification on a toy dataset, including sparse versions."""
clf = NearestCentroid()
clf.fit(X, y)
assert_array_equal(clf.predict(T), true_result)
# Same test, but with a sparse matrix to fit and test.
clf = NearestCentroid()
clf.fit(X_csr, y)
assert_array_equal(clf.predict(T_csr), true_result)
# Fit with sparse, test with non-sparse
clf = NearestCentroid()
clf.fit(X_csr, y)
assert_array_equal(clf.predict(T), true_result)
# Fit with non-sparse, test with sparse
clf = NearestCentroid()
clf.fit(X, y)
assert_array_equal(clf.predict(T_csr), true_result)
def test_precomputed():
clf = NearestCentroid(metric="precomputed")
clf.fit(X, y)
S = pairwise_distances(T, clf.centroids_)
assert_array_equal(clf.predict(S), true_result)
def test_iris():
"""Check consistency on dataset iris."""
for metric in ('euclidean', 'cosine'):
clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
score = np.mean(clf.predict(iris.data) == iris.target)
assert score > 0.9, "Failed with score = " + str(score)
def test_iris_shrinkage():
"""Check consistency on dataset iris, when using shrinkage."""
for metric in ('euclidean', 'cosine'):
for shrink_threshold in [None, 0.1, 0.5]:
clf = NearestCentroid(metric=metric,
shrink_threshold=shrink_threshold)
clf = clf.fit(iris.data, iris.target)
score = np.mean(clf.predict(iris.data) == iris.target)
assert score > 0.8, "Failed with score = " + str(score)
def test_pickle():
import pickle
# classification
obj = NearestCentroid()
obj.fit(iris.data, iris.target)
score = obj.score(iris.data, iris.target)
s = pickle.dumps(obj)
obj2 = pickle.loads(s)
assert_equal(type(obj2), obj.__class__)
score2 = obj2.score(iris.data, iris.target)
assert_array_equal(score, score2,
"Failed to generate same score"
" after pickling (classification).")
if __name__ == "__main__":
import nose
nose.runmodule()
|