File: test_aligned_umap.py

package info (click to toggle)
umap-learn 0.5.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 2,468 kB
  • sloc: python: 9,458; sh: 87; makefile: 20
file content (53 lines) | stat: -rw-r--r-- 1,972 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from umap import AlignedUMAP
from sklearn.metrics import pairwise_distances
from sklearn.cluster import KMeans
import numpy as np
from sklearn.metrics import adjusted_rand_score

# ===============================
# Test AlignedUMAP on sliced iris
# ===============================


def nn_accuracy(true_nn, embd_nn):
    num_correct = 0.0
    for i in range(true_nn.shape[0]):
        num_correct += np.sum(np.in1d(true_nn[i], embd_nn[i]))
    return num_correct / true_nn.size


def test_neighbor_local_neighbor_accuracy(aligned_iris, aligned_iris_model):
    data, target = aligned_iris
    for i, slice in enumerate(data):
        data_dmat = pairwise_distances(slice)
        true_nn = np.argsort(data_dmat, axis=1)[:, :10]
        embd_dmat = pairwise_distances(aligned_iris_model.embeddings_[i])
        embd_nn = np.argsort(embd_dmat, axis=1)[:, :10]
        assert nn_accuracy(true_nn, embd_nn) >= 0.65


def test_local_clustering(aligned_iris, aligned_iris_model):
    data, target = aligned_iris

    embd = aligned_iris_model.embeddings_[1]
    clusters = KMeans(n_clusters=2).fit_predict(embd)
    ari = adjusted_rand_score(target[1], clusters)
    assert ari >= 0.75

    embd = aligned_iris_model.embeddings_[3]
    clusters = KMeans(n_clusters=2).fit_predict(embd)
    ari = adjusted_rand_score(target[3], clusters)
    assert ari >= 0.40


def test_aligned_update(aligned_iris, aligned_iris_relations):
    data, target = aligned_iris
    small_aligned_model = AlignedUMAP()
    small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2])
    small_aligned_model.update(data[3], relations=aligned_iris_relations[2])
    for i, slice in enumerate(data[:4]):
        data_dmat = pairwise_distances(slice)
        true_nn = np.argsort(data_dmat, axis=1)[:, :10]
        embd_dmat = pairwise_distances(small_aligned_model.embeddings_[i])
        embd_nn = np.argsort(embd_dmat, axis=1)[:, :10]
        assert nn_accuracy(true_nn, embd_nn) >= 0.45