File: test_umap_validation_params.py

package info (click to toggle)
umap-learn 0.4.5%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,668 kB
  • sloc: python: 7,504; sh: 77; makefile: 17
file content (161 lines) | stat: -rw-r--r-- 4,279 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# ===============================
#  UMAP fit Parameters Validation
# ===============================

import warnings
import numpy as np
from nose.tools import assert_equal, assert_raises
from umap import UMAP

warnings.filterwarnings("ignore", category=UserWarning)


def test_umap_negative_op(nn_data):
    u = UMAP(set_op_mix_ratio=-1.0)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_too_large_op(nn_data):
    u = UMAP(set_op_mix_ratio=1.5)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_bad_too_large_min_dist(nn_data):
    u = UMAP(min_dist=2.0)
    # a RuntimeWarning about division by zero in a,b curve fitting is expected
    # caught and ignored for this test
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_min_dist(nn_data):
    u = UMAP(min_dist=-1)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_n_components(nn_data):
    u = UMAP(n_components=-1)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_non_integer_n_components(nn_data):
    u = UMAP(n_components=1.5)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_too_small_n_neighbours(nn_data):
    u = UMAP(n_neighbors=0.5)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_n_neighbours(nn_data):
    u = UMAP(n_neighbors=-1)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_bad_metric(nn_data):
    u = UMAP(metric=45)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_learning_rate(nn_data):
    u = UMAP(learning_rate=-1.5)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_repulsion(nn_data):
    u = UMAP(repulsion_strength=-0.5)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_sample_rate(nn_data):
    u = UMAP(negative_sample_rate=-1)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_bad_init(nn_data):
    u = UMAP(init="foobar")
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_bad_numeric_init(nn_data):
    u = UMAP(init=42)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_bad_matrix_init(nn_data):
    u = UMAP(init=np.array([[0, 0, 0], [0, 0, 0]]))
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_n_epochs(nn_data):
    u = UMAP(n_epochs=-2)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_negative_target_n_neighbours(nn_data):
    u = UMAP(target_n_neighbors=1)
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_bad_output_metric(nn_data):
    u = UMAP(output_metric="foobar")
    assert_raises(ValueError, u.fit, nn_data)
    u = UMAP(output_metric="precomputed")
    assert_raises(ValueError, u.fit, nn_data)
    u = UMAP(output_metric="hamming")
    assert_raises(ValueError, u.fit, nn_data)


def test_haversine_on_highd(nn_data):
    u = UMAP(metric="haversine")
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_haversine_embed_to_highd(nn_data):
    u = UMAP(n_components=3, output_metric="haversine")
    assert_raises(ValueError, u.fit, nn_data)


def test_umap_too_many_neighbors_warns(nn_data):
    u = UMAP(a=1.2, b=1.75, n_neighbors=2000, n_epochs=11, init="random")
    u.fit(
        nn_data[:100,]
    )
    assert_equal(u._a, 1.2)
    assert_equal(u._b, 1.75)


def test_umap_fit_data_and_targets_compliant():
    # x and y are required to be the same length
    u = UMAP()
    x = np.random.uniform(0, 1, (256, 10))
    y = np.random.randint(10, size=(257,))
    assert_raises(ValueError, u.fit, x, y)

    u = UMAP()
    x = np.random.uniform(0, 1, (256, 10))
    y = np.random.randint(10, size=(255,))
    assert_raises(ValueError, u.fit, x, y)

    u = UMAP()
    x = np.random.uniform(0, 1, (256, 10))
    assert_raises(ValueError, u.fit, x, [])


def test_umap_fit_instance_returned():
    # Test that fit returns a new UMAP instance

    # Passing both data and targets
    u = UMAP()
    x = np.random.uniform(0, 1, (256, 10))
    y = np.random.randint(10, size=(256,))
    res = u.fit(x, y)
    assert isinstance(res, UMAP)

    # Passing only data
    u = UMAP()
    x = np.random.uniform(0, 1, (256, 10))
    res = u.fit(x)
    assert isinstance(res, UMAP)