File: test_umap_get_feature_names_out.py

package info (click to toggle)
umap-learn 0.5.9.post2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,528 kB
  • sloc: python: 10,021; sh: 87; makefile: 20
file content (83 lines) | stat: -rw-r--r-- 2,643 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
from sklearn.datasets import make_classification
from sklearn.pipeline import Pipeline, FeatureUnion

from ..umap_ import UMAP


def test_get_feature_names_out():
    X, _ = make_classification(n_samples=30, n_features=10, random_state=42)
    umap = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        n_epochs=200,
        random_state=42,
        n_components=3,
    ).fit(X)
    # get_feature_names_out should not care about passed features.
    features_names_in = [f"feature{i}" for i in range(10)]
    feature_names_out = umap.get_feature_names_out(input_features=features_names_in)
    expected = ["umap0", "umap1", "umap2"]
    np.testing.assert_array_equal(feature_names_out, expected)


def test_get_feature_names_out_default():
    X, _ = make_classification(n_samples=30, n_features=10, random_state=42)
    umap = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        n_epochs=200,
        random_state=42,
        n_components=3,
    ).fit(X)
    # get_feature_names_out should generate feature names in a certain format if no names are passed.
    default_result = umap.get_feature_names_out()
    expected_default_result = ["umap0", "umap1", "umap2"]
    np.testing.assert_array_equal(default_result, expected_default_result)


def test_get_feature_names_out_multicomponent():
    # The output length should be equal to the number of components UMAP generates.
    X, _ = make_classification(n_samples=30, n_features=10, random_state=42)
    umap = UMAP(
        n_neighbors=10,
        min_dist=0.01,
        n_epochs=200,
        random_state=42,
        n_components=9,
    ).fit(X)
    result_umap = umap.get_feature_names_out()
    expected_umap_result = [f"umap{i}" for i in range(9)]
    assert len(result_umap) == 9
    np.testing.assert_array_equal(result_umap, expected_umap_result)



def test_get_feature_names_out_featureunion():
    X, _ = make_classification(n_samples=30, n_features=10, random_state=42)
    pipeline = Pipeline(
        [
            (
                "umap_pipeline",
                FeatureUnion(
                    [
                        ("umap1", UMAP(n_components=2)),
                        ("umap2", UMAP(n_components=3)),
                    ]
                ),
            )
        ]
    )

    pipeline.fit(X)
    feature_names = pipeline.get_feature_names_out()
    expected_feature_names = np.array(
        [
            "umap1__umap0",
            "umap1__umap1",
            "umap2__umap0",
            "umap2__umap1",
            "umap2__umap2",
        ]
    )
    np.testing.assert_array_equal(feature_names, expected_feature_names)