File: test_aligned_umap.py

package info (click to toggle)
umap-learn 0.5.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,284 kB
  • sloc: python: 9,863; sh: 87; makefile: 20
file content (82 lines) | stat: -rw-r--r-- 3,323 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pytest
from umap import AlignedUMAP
from sklearn.metrics import pairwise_distances
from sklearn.cluster import KMeans
import numpy as np
from sklearn.metrics import adjusted_rand_score

# ===============================
# Test AlignedUMAP on sliced iris
# ===============================


def nn_accuracy(true_nn, embd_nn):
    num_correct = 0.0
    for i in range(true_nn.shape[0]):
        num_correct += np.sum(np.in1d(true_nn[i], embd_nn[i]))
    return num_correct / true_nn.size


def test_neighbor_local_neighbor_accuracy(aligned_iris, aligned_iris_model):
    data, target = aligned_iris
    for i, slice in enumerate(data):
        data_dmat = pairwise_distances(slice)
        true_nn = np.argsort(data_dmat, axis=1)[:, :10]
        embd_dmat = pairwise_distances(aligned_iris_model.embeddings_[i])
        embd_nn = np.argsort(embd_dmat, axis=1)[:, :10]
        assert nn_accuracy(true_nn, embd_nn) >= 0.65


def test_local_clustering(aligned_iris, aligned_iris_model):
    data, target = aligned_iris

    embd = aligned_iris_model.embeddings_[1]
    clusters = KMeans(n_clusters=2).fit_predict(embd)
    ari = adjusted_rand_score(target[1], clusters)
    assert ari >= 0.75

    embd = aligned_iris_model.embeddings_[3]
    clusters = KMeans(n_clusters=2).fit_predict(embd)
    ari = adjusted_rand_score(target[3], clusters)
    assert ari >= 0.40


def test_aligned_update(aligned_iris, aligned_iris_relations):
    data, target = aligned_iris
    small_aligned_model = AlignedUMAP()
    small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2])
    small_aligned_model.update(data[3], relations=aligned_iris_relations[2])
    for i, slice in enumerate(data[:4]):
        data_dmat = pairwise_distances(slice)
        true_nn = np.argsort(data_dmat, axis=1)[:, :10]
        embd_dmat = pairwise_distances(small_aligned_model.embeddings_[i])
        embd_nn = np.argsort(embd_dmat, axis=1)[:, :10]
        assert nn_accuracy(true_nn, embd_nn) >= 0.45


def test_aligned_update_params(aligned_iris, aligned_iris_relations):
    data, target = aligned_iris
    n_neighbors = [15, 15, 15, 15, 15]
    small_aligned_model = AlignedUMAP(n_neighbors=n_neighbors[:3])
    small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2])
    small_aligned_model.update(data[3],
                               relations=aligned_iris_relations[2],
                               n_neighbors=n_neighbors[3])
    for i, slice in enumerate(data[:4]):
        data_dmat = pairwise_distances(slice)
        true_nn = np.argsort(data_dmat, axis=1)[:, :10]
        embd_dmat = pairwise_distances(small_aligned_model.embeddings_[i])
        embd_nn = np.argsort(embd_dmat, axis=1)[:, :10]
        assert nn_accuracy(true_nn, embd_nn) >= 0.45

@pytest.mark.skip(reason="Temporarily disable")
def test_aligned_update_array_error(aligned_iris, aligned_iris_relations):
    data, target = aligned_iris
    n_neighbors = [15, 15, 15, 15, 15]
    small_aligned_model = AlignedUMAP(n_neighbors=n_neighbors[:3])
    small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2])

    with pytest.raises(ValueError):
        small_aligned_model.update(data[3:],
                                   relations=aligned_iris_relations[2:],
                                   n_neighbors=n_neighbors[3:])