1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
|
from umap import UMAP
import pytest
try:
# works for sklearn>=0.22
from sklearn.manifold import trustworthiness
except ImportError:
# this is to comply with requirements (scikit-learn>=0.20)
# More recent versions of sklearn have exposed trustworthiness
# in top level module API
# see: https://github.com/scikit-learn/scikit-learn/pull/15337
from sklearn.manifold.t_sne import trustworthiness
def test_composite_trustworthiness(nn_data, iris_model):
data = nn_data[:50]
model1 = UMAP(n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=50).fit(data)
model2 = UMAP(
n_neighbors=30,
min_dist=0.01,
random_state=42,
n_epochs=50,
init=model1.embedding_,
).fit(data)
model3 = model1 * model2
trust = trustworthiness(data, model3.embedding_, n_neighbors=10)
assert (
trust >= 0.80
), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust)
model4 = model1 + model2
trust = trustworthiness(data, model4.embedding_, n_neighbors=10)
assert (
trust >= 0.80
), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust)
with pytest.raises(ValueError):
_ = model1 + iris_model
with pytest.raises(ValueError):
_ = model1 * iris_model
with pytest.raises(ValueError):
_ = model1 - iris_model
@pytest.mark.skip(reason="Marked as Skipped test")
def test_composite_trustworthiness_random_init(nn_data): # pragma: no cover
data = nn_data[:50]
model1 = UMAP(
n_neighbors=10,
min_dist=0.01,
random_state=42,
n_epochs=50,
init="random",
).fit(data)
model2 = UMAP(
n_neighbors=30,
min_dist=0.01,
random_state=42,
n_epochs=50,
init="random",
).fit(data)
model3 = model1 * model2
trust = trustworthiness(data, model3.embedding_, n_neighbors=10)
assert (
trust >= 0.82
), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust)
model4 = model1 + model2
trust = trustworthiness(data, model4.embedding_, n_neighbors=10)
assert (
trust >= 0.82
), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust)
def test_composite_trustworthiness_on_iris(iris):
iris_model1 = UMAP(
n_neighbors=10,
min_dist=0.01,
random_state=42,
n_epochs=100,
).fit(iris.data[:, :2])
iris_model2 = UMAP(
n_neighbors=10,
min_dist=0.01,
random_state=42,
n_epochs=100,
).fit(iris.data[:, 2:])
embedding = (iris_model1 + iris_model2).embedding_
trust = trustworthiness(iris.data, embedding, n_neighbors=10)
assert (
trust >= 0.82
), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust)
embedding = (iris_model1 * iris_model2).embedding_
trust = trustworthiness(iris.data, embedding, n_neighbors=10)
assert (
trust >= 0.82
), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust)
def test_contrastive_trustworthiness_on_iris(iris):
iris_model1 = UMAP(
n_neighbors=10,
min_dist=0.01,
random_state=42,
n_epochs=100,
).fit(iris.data[:, :2])
iris_model2 = UMAP(
n_neighbors=10,
min_dist=0.01,
random_state=42,
n_epochs=100,
).fit(iris.data[:, 2:])
embedding = (iris_model1 - iris_model2).embedding_
trust = trustworthiness(iris.data, embedding, n_neighbors=10)
assert (
trust >= 0.75
), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust)
|