File: test_mds.py

package info (click to toggle)
scikit-learn 1.7.2%2Bdfsg-3
links: PTS, VCS
area: main
in suites: forky, sid
size: 25,752 kB
sloc: python: 219,120; cpp: 5,790; ansic: 846; makefile: 191; javascript: 110
file content (234 lines) | stat: -rw-r--r-- 7,197 bytes
from unittest.mock import Mock

import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_array_almost_equal, assert_equal

from sklearn.datasets import load_digits
from sklearn.manifold import _mds as mds
from sklearn.metrics import euclidean_distances


def test_smacof():
    # test metric smacof using the data of "Modern Multidimensional Scaling",
    # Borg & Groenen, p 154
    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
    Z = np.array([[-0.266, -0.539], [0.451, 0.252], [0.016, -0.238], [-0.200, 0.524]])
    X, _ = mds.smacof(sim, init=Z, n_components=2, max_iter=1, n_init=1)
    X_true = np.array(
        [[-1.415, -2.471], [1.633, 1.107], [0.249, -0.067], [-0.468, 1.431]]
    )
    assert_array_almost_equal(X, X_true, decimal=3)


def test_nonmetric_lower_normalized_stress():
    # Testing that nonmetric MDS results in lower normalized stress compared
    # compared to metric MDS (non-regression test for issue 27028)
    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
    Z = np.array([[-0.266, -0.539], [0.451, 0.252], [0.016, -0.238], [-0.200, 0.524]])

    _, stress1 = mds.smacof(
        sim, init=Z, n_components=2, max_iter=1000, n_init=1, normalized_stress=True
    )

    _, stress2 = mds.smacof(
        sim,
        init=Z,
        n_components=2,
        max_iter=1000,
        n_init=1,
        normalized_stress=True,
        metric=False,
    )
    assert stress1 > stress2


def test_nonmetric_mds_optimization():
    # Test that stress is decreasing during nonmetric MDS optimization
    # (non-regression test for issue 27028)
    X, _ = load_digits(return_X_y=True)
    rng = np.random.default_rng(seed=42)
    ind_subset = rng.choice(len(X), size=200, replace=False)
    X = X[ind_subset]

    mds_est = mds.MDS(
        n_components=2,
        n_init=1,
        max_iter=2,
        metric=False,
        random_state=42,
    ).fit(X)
    stress_after_2_iter = mds_est.stress_

    mds_est = mds.MDS(
        n_components=2,
        n_init=1,
        max_iter=3,
        metric=False,
        random_state=42,
    ).fit(X)
    stress_after_3_iter = mds_est.stress_

    assert stress_after_2_iter > stress_after_3_iter


@pytest.mark.parametrize("metric", [True, False])
def test_mds_recovers_true_data(metric):
    X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])
    mds_est = mds.MDS(
        n_components=2,
        n_init=1,
        eps=1e-15,
        max_iter=1000,
        metric=metric,
        random_state=42,
    ).fit(X)
    stress = mds_est.stress_
    assert_allclose(stress, 0, atol=1e-6)


def test_smacof_error():
    # Not symmetric similarity matrix:
    sim = np.array([[0, 5, 9, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])

    with pytest.raises(ValueError):
        mds.smacof(sim, n_init=1)

    # Not squared similarity matrix:
    sim = np.array([[0, 5, 9, 4], [5, 0, 2, 2], [4, 2, 1, 0]])

    with pytest.raises(ValueError):
        mds.smacof(sim, n_init=1)

    # init not None and not correct format:
    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])

    Z = np.array([[-0.266, -0.539], [0.016, -0.238], [-0.200, 0.524]])
    with pytest.raises(ValueError):
        mds.smacof(sim, init=Z, n_init=1)


def test_MDS():
    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])
    mds_clf = mds.MDS(
        metric=False,
        n_jobs=3,
        n_init=3,
        dissimilarity="precomputed",
    )
    mds_clf.fit(sim)


# TODO(1.9): remove warning filter
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("k", [0.5, 1.5, 2])
def test_normed_stress(k):
    """Test that non-metric MDS normalized stress is scale-invariant."""
    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])

    X1, stress1 = mds.smacof(sim, metric=False, max_iter=5, random_state=0)
    X2, stress2 = mds.smacof(k * sim, metric=False, max_iter=5, random_state=0)

    assert_allclose(stress1, stress2, rtol=1e-5)
    assert_allclose(X1, X2, rtol=1e-5)


# TODO(1.9): remove warning filter
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("metric", [True, False])
def test_normalized_stress_auto(metric, monkeypatch):
    rng = np.random.RandomState(0)
    X = rng.randn(4, 3)
    dist = euclidean_distances(X)

    mock = Mock(side_effect=mds._smacof_single)
    monkeypatch.setattr("sklearn.manifold._mds._smacof_single", mock)

    est = mds.MDS(metric=metric, normalized_stress="auto", random_state=rng)
    est.fit_transform(X)
    assert mock.call_args[1]["normalized_stress"] != metric

    mds.smacof(dist, metric=metric, normalized_stress="auto", random_state=rng)
    assert mock.call_args[1]["normalized_stress"] != metric


def test_isotonic_outofbounds():
    # This particular configuration can trigger out of bounds error
    # in the isotonic regression (non-regression test for issue 26999)
    dis = np.array(
        [
            [0.0, 1.732050807568877, 1.7320508075688772],
            [1.732050807568877, 0.0, 6.661338147750939e-16],
            [1.7320508075688772, 6.661338147750939e-16, 0.0],
        ]
    )
    init = np.array(
        [
            [0.08665881585055124, 0.7939114643387546],
            [0.9959834154297658, 0.7555546025640025],
            [0.8766008278401566, 0.4227358815811242],
        ]
    )
    mds.smacof(dis, init=init, metric=False, n_init=1)


# TODO(1.9): remove warning filter
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("normalized_stress", [True, False])
def test_returned_stress(normalized_stress):
    # Test that the final stress corresponds to the final embedding
    # (non-regression test for issue 16846)
    X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])
    D = euclidean_distances(X)

    mds_est = mds.MDS(
        n_components=2,
        random_state=42,
        normalized_stress=normalized_stress,
    ).fit(X)

    Z = mds_est.embedding_
    stress = mds_est.stress_

    D_mds = euclidean_distances(Z)
    stress_Z = ((D_mds.ravel() - D.ravel()) ** 2).sum() / 2

    if normalized_stress:
        stress_Z = np.sqrt(stress_Z / ((D_mds.ravel() ** 2).sum() / 2))

    assert_allclose(stress, stress_Z)


# TODO(1.9): remove warning filter
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("metric", [True, False])
def test_convergence_does_not_depend_on_scale(metric):
    # Test that the number of iterations until convergence does not depend on
    # the scale of the input data
    X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])

    mds_est = mds.MDS(
        n_components=2,
        random_state=42,
        metric=metric,
    )

    mds_est.fit(X * 100)
    n_iter1 = mds_est.n_iter_

    mds_est.fit(X / 100)
    n_iter2 = mds_est.n_iter_

    assert_equal(n_iter1, n_iter2)


# TODO(1.9): delete this test
def test_future_warning_n_init():
    X = np.array([[1, 1], [1, 4], [1, 5], [3, 3]])
    sim = np.array([[0, 5, 3, 4], [5, 0, 2, 2], [3, 2, 0, 1], [4, 2, 1, 0]])

    with pytest.warns(FutureWarning):
        mds.smacof(sim)

    with pytest.warns(FutureWarning):
        mds.MDS().fit(X)