1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
|
from umap import AlignedUMAP
from sklearn.metrics import pairwise_distances
from sklearn.cluster import KMeans
import numpy as np
from sklearn.metrics import adjusted_rand_score
# ===============================
# Test AlignedUMAP on sliced iris
# ===============================
def nn_accuracy(true_nn, embd_nn):
num_correct = 0.0
for i in range(true_nn.shape[0]):
num_correct += np.sum(np.in1d(true_nn[i], embd_nn[i]))
return num_correct / true_nn.size
def test_neighbor_local_neighbor_accuracy(aligned_iris, aligned_iris_model):
data, target = aligned_iris
for i, slice in enumerate(data):
data_dmat = pairwise_distances(slice)
true_nn = np.argsort(data_dmat, axis=1)[:, :10]
embd_dmat = pairwise_distances(aligned_iris_model.embeddings_[i])
embd_nn = np.argsort(embd_dmat, axis=1)[:, :10]
assert nn_accuracy(true_nn, embd_nn) >= 0.65
def test_local_clustering(aligned_iris, aligned_iris_model):
data, target = aligned_iris
embd = aligned_iris_model.embeddings_[1]
clusters = KMeans(n_clusters=2).fit_predict(embd)
ari = adjusted_rand_score(target[1], clusters)
assert ari >= 0.75
embd = aligned_iris_model.embeddings_[3]
clusters = KMeans(n_clusters=2).fit_predict(embd)
ari = adjusted_rand_score(target[3], clusters)
assert ari >= 0.40
def test_aligned_update(aligned_iris, aligned_iris_relations):
data, target = aligned_iris
small_aligned_model = AlignedUMAP()
small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2])
small_aligned_model.update(data[3], relations=aligned_iris_relations[2])
for i, slice in enumerate(data[:4]):
data_dmat = pairwise_distances(slice)
true_nn = np.argsort(data_dmat, axis=1)[:, :10]
embd_dmat = pairwise_distances(small_aligned_model.embeddings_[i])
embd_nn = np.argsort(embd_dmat, axis=1)[:, :10]
assert nn_accuracy(true_nn, embd_nn) >= 0.45
|