1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
|
import numpy as np
from umap import UMAP
# ===================================================
# Spatial Data Test cases
# ===================================================
# Use force_approximation_algorithm in order to test
# the region of the code that is called for n>4096
# ---------------------------------------------------
def test_repeated_points_large_sparse_spatial(sparse_spatial_data_repeats):
model = UMAP(
n_neighbors=3,
unique=True,
force_approximation_algorithm=True,
n_epochs=20,
verbose=True,
).fit(sparse_spatial_data_repeats)
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
def test_repeated_points_small_sparse_spatial(sparse_spatial_data_repeats):
model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit(
sparse_spatial_data_repeats
)
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
# Use force_approximation_algorithm in order to test the region
# of the code that is called for n>4096
def test_repeated_points_large_dense_spatial(spatial_repeats):
model = UMAP(
n_neighbors=3, unique=True, force_approximation_algorithm=True, n_epochs=50
).fit(spatial_repeats)
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
def test_repeated_points_small_dense_spatial(spatial_repeats):
model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit(spatial_repeats)
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
# ===================================================
# Binary Data Test cases
# ===================================================
# Use force_approximation_algorithm in order to test
# the region of the code that is called for n>4096
# ---------------------------------------------------
def test_repeated_points_large_sparse_binary(sparse_binary_data_repeats):
model = UMAP(
n_neighbors=3, unique=True, force_approximation_algorithm=True, n_epochs=50
).fit(sparse_binary_data_repeats)
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
def test_repeated_points_small_sparse_binary(sparse_binary_data_repeats):
model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit(
sparse_binary_data_repeats
)
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
# Use force_approximation_algorithm in order to test
# the region of the code that is called for n>4096
def test_repeated_points_large_dense_binary(binary_repeats):
model = UMAP(
n_neighbors=3, unique=True, force_approximation_algorithm=True, n_epochs=20
).fit(binary_repeats)
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
def test_repeated_points_small_dense_binary(binary_repeats):
model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit(binary_repeats)
assert np.unique(binary_repeats[0:2], axis=0).shape[0] == 1
assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1
# ===================================================
# Repeated Data Test cases
# ===================================================
# ----------------------------------------------------
# This should test whether the n_neighbours are being
# reduced properly when your n_neighbours is larger
# than the unique data set size
# ----------------------------------------------------
def test_repeated_points_large_n(repetition_dense):
model = UMAP(n_neighbors=5, unique=True, n_epochs=20).fit(repetition_dense)
assert model._n_neighbors == 3
|