File: test_umap_get_feature_names_out.py

package info (click to toggle)
umap-learn 0.5.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,284 kB
  • sloc: python: 9,863; sh: 87; makefile: 20
file content (53 lines) | stat: -rw-r--r-- 1,975 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
from sklearn.datasets import make_classification
from sklearn.pipeline import Pipeline, FeatureUnion

from ..umap_ import UMAP


def test_get_feature_names_out_passthrough():
    umap = UMAP()
    # get_feature_names_out should return same names if feature are passed in directly.
    example_passthrough = ['feature1', 'feature2']
    passthrough_result = umap.get_feature_names_out(feature_names_out=example_passthrough)
    assert example_passthrough == passthrough_result


def test_get_feature_names_out_default():
    umap = UMAP()
    # get_feature_names_out should generate feature names in a certain format if no names are passed.
    default_result = umap.get_feature_names_out()
    expected_default_result = ["umap_component_1", "umap_component_2"]
    assert default_result == expected_default_result


def test_get_feature_names_out_multicomponent():
    # The output length should be equal to the number of components UMAP generates.
    umap10 = UMAP(n_components=10)
    result_umap10 = umap10.get_feature_names_out()
    expected_umap10_result = [f"umap_component_{i+1}" for i in range(10)]
    assert len(result_umap10) == 10
    assert result_umap10 == expected_umap10_result


def test_get_feature_names_out_featureunion():
    X, _ = make_classification(n_samples=10)
    pipeline = Pipeline(
        [
            (
                "umap_pipeline",
                FeatureUnion(
                    [
                        ("umap1", UMAP()),
                        ("umap2", UMAP(n_components=3)),
                    ]
                ),
            )
        ]
    )

    pipeline.fit(X)
    feature_names = pipeline.get_feature_names_out()
    expected_feature_names = np.array(["umap1__umap_component_1", "umap1__umap_component_2", "umap2__umap_component_1",
                                       "umap2__umap_component_2", "umap2__umap_component_3"])
    np.testing.assert_array_equal(feature_names, expected_feature_names)