File: test_optimizer.py

package info (click to toggle)
scikit-optimize 0.10.2-6
links: PTS, VCS
area: main
in suites: forky, sid
size: 7,736 kB
sloc: python: 10,668; javascript: 438; makefile: 139; sh: 6
file content (482 lines) | stat: -rw-r--r-- 14,119 bytes
parent folder | download | duplicates (2)
import numpy as np
import pytest
from numpy.testing import assert_array_equal, assert_equal, assert_raises
from scipy.optimize import OptimizeResult
from sklearn.multioutput import MultiOutputRegressor

from skopt import gp_minimize
from skopt.benchmarks import bench1, bench1_with_time, branin
from skopt.learning import (
    ExtraTreesRegressor,
    GradientBoostingQuantileRegressor,
    RandomForestRegressor,
)
from skopt.optimizer import Optimizer

TREE_REGRESSORS = (
    ExtraTreesRegressor(random_state=2),
    RandomForestRegressor(random_state=2),
    GradientBoostingQuantileRegressor(random_state=2),
)
ACQ_FUNCS_PS = ["EIps", "PIps"]
ACQ_FUNCS_MIXED = ["EI", "EIps"]
ESTIMATOR_STRINGS = [
    "GP",
    "RF",
    "ET",
    "GBRT",
    "DUMMY",
    "gp",
    "rf",
    "et",
    "gbrt",
    "dummy",
]


@pytest.mark.fast_test
def test_multiple_asks():
    # calling ask() multiple times without a tell() inbetween should
    # be a "no op"
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling"
    )

    opt.run(bench1, n_iter=3)
    # tell() computes the next point ready for the next call to ask()
    # hence there are three after three iterations
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    opt.ask()
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    assert_equal(opt.ask(), opt.ask())
    opt.update_next()
    assert_equal(opt.ask(), opt.ask())


@pytest.mark.fast_test
def test_model_queue_size():
    # Check if model_queue_size limits the model queue size
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(-2.0, 2.0)],
        base_estimator,
        n_initial_points=1,
        acq_optimizer="sampling",
        model_queue_size=2,
    )

    opt.run(bench1, n_iter=3)
    # tell() computes the next point ready for the next call to ask()
    # hence there are three after three iterations
    assert_equal(len(opt.models), 2)
    assert_equal(len(opt.Xi), 3)
    opt.ask()
    assert_equal(len(opt.models), 2)
    assert_equal(len(opt.Xi), 3)
    assert_equal(opt.ask(), opt.ask())


@pytest.mark.fast_test
def test_invalid_tell_arguments():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling"
    )

    # can't have single point and multiple values for y
    assert_raises(ValueError, opt.tell, [1.0], [1.0, 1.0])


@pytest.mark.fast_test
def test_invalid_tell_arguments_list():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling"
    )

    assert_raises(ValueError, opt.tell, [[1.0], [2.0]], [1.0, None])


@pytest.mark.fast_test
def test_bounds_checking_1D():
    low = -2.0
    high = 2.0
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(low, high)], base_estimator, n_initial_points=1, acq_optimizer="sampling"
    )

    assert_raises(ValueError, opt.tell, [high + 0.5], 2.0)
    assert_raises(ValueError, opt.tell, [low - 0.5], 2.0)
    # feed two points to tell() at once
    assert_raises(ValueError, opt.tell, [high + 0.5, high], (2.0, 3.0))
    assert_raises(ValueError, opt.tell, [low - 0.5, high], (2.0, 3.0))


@pytest.mark.fast_test
def test_bounds_checking_2D():
    low = -2.0
    high = 2.0
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(low, high), (low + 4, high + 4)],
        base_estimator,
        n_initial_points=1,
        acq_optimizer="sampling",
    )

    assert_raises(ValueError, opt.tell, [high + 0.5, high + 4.5], 2.0)
    assert_raises(ValueError, opt.tell, [low - 0.5, low - 4.5], 2.0)

    # first out, second in
    assert_raises(ValueError, opt.tell, [high + 0.5, high + 0.5], 2.0)
    assert_raises(ValueError, opt.tell, [low - 0.5, high + 0.5], 2.0)


@pytest.mark.fast_test
def test_bounds_checking_2D_multiple_points():
    low = -2.0
    high = 2.0
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(low, high), (low + 4, high + 4)],
        base_estimator,
        n_initial_points=1,
        acq_optimizer="sampling",
    )

    # first component out, second in
    assert_raises(
        ValueError,
        opt.tell,
        [(high + 0.5, high + 0.5), (high + 0.5, high + 0.5)],
        [2.0, 3.0],
    )
    assert_raises(
        ValueError,
        opt.tell,
        [(low - 0.5, high + 0.5), (low - 0.5, high + 0.5)],
        [2.0, 3.0],
    )


@pytest.mark.fast_test
def test_dimension_checking_1D():
    low = -2
    high = 2
    opt = Optimizer([(low, high)])
    with pytest.raises(ValueError) as e:
        # within bounds but one dimension too high
        opt.tell([low + 1, low + 1], 2.0)
    assert "Dimensions of point " in str(e.value)


@pytest.mark.fast_test
def test_dimension_checking_2D():
    low = -2
    high = 2
    opt = Optimizer([(low, high), (low, high)])
    # within bounds but one dimension too little
    with pytest.raises(ValueError) as e:
        opt.tell(
            [
                low + 1,
            ],
            2.0,
        )
    assert "Dimensions of point " in str(e.value)
    # within bounds but one dimension too much
    with pytest.raises(ValueError) as e:
        opt.tell([low + 1, low + 1, low + 1], 2.0)
    assert "Dimensions of point " in str(e.value)


@pytest.mark.fast_test
def test_dimension_checking_2D_multiple_points():
    low = -2
    high = 2
    opt = Optimizer([(low, high), (low, high)])
    # within bounds but one dimension too little
    with pytest.raises(ValueError) as e:
        opt.tell(
            [
                [
                    low + 1,
                ],
                [low + 1, low + 2],
                [low + 1, low + 3],
            ],
            2.0,
        )
    assert "dimensions as the space" in str(e.value)
    # within bounds but one dimension too much
    with pytest.raises(ValueError) as e:
        opt.tell(
            [[low + 1, low + 1, low + 1], [low + 1, low + 2], [low + 1, low + 3]], 2.0
        )
    assert "dimensions as the space" in str(e.value)


@pytest.mark.fast_test
def test_returns_result_object():
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(-2.0, 2.0)], base_estimator, n_initial_points=1, acq_optimizer="sampling"
    )
    result = opt.tell([1.5], 2.0)

    assert isinstance(result, OptimizeResult)
    assert_equal(len(result.x_iters), len(result.func_vals))
    assert_equal(np.min(result.func_vals), result.fun)


@pytest.mark.fast_test
@pytest.mark.parametrize("base_estimator", TREE_REGRESSORS)
def test_acq_optimizer(base_estimator):
    with pytest.raises(ValueError) as e:
        Optimizer(
            [(-2.0, 2.0)],
            base_estimator=base_estimator,
            n_initial_points=1,
            acq_optimizer='lbfgs',
        )
    assert "should run with acq_optimizer='sampling'" in str(e.value)


@pytest.mark.parametrize("base_estimator", TREE_REGRESSORS)
@pytest.mark.parametrize("acq_func", ACQ_FUNCS_PS)
def test_acq_optimizer_with_time_api(base_estimator, acq_func):
    opt = Optimizer(
        [
            (-2.0, 2.0),
        ],
        base_estimator=base_estimator,
        acq_func=acq_func,
        acq_optimizer="sampling",
        n_initial_points=2,
    )
    x1 = opt.ask()
    opt.tell(x1, (bench1(x1), 1.0))
    x2 = opt.ask()
    res = opt.tell(x2, (bench1(x2), 2.0))

    # x1 and x2 are random.
    assert x1 != x2

    assert len(res.models) == 1
    assert_array_equal(res.func_vals.shape, (2,))
    assert_array_equal(res.log_time.shape, (2,))

    # x3 = opt.ask()

    with pytest.raises(TypeError) as _:
        opt.tell(x2, bench1(x2))


@pytest.mark.fast_test
@pytest.mark.parametrize("acq_func", ACQ_FUNCS_MIXED)
def test_optimizer_copy(acq_func):
    # Checks that the base estimator, the objective and target values
    # are copied correctly.

    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer(
        [(-2.0, 2.0)],
        base_estimator,
        acq_func=acq_func,
        n_initial_points=1,
        acq_optimizer="sampling",
    )

    # run three iterations so that we have some points and objective values
    if "ps" in acq_func:
        opt.run(bench1_with_time, n_iter=3)
    else:
        opt.run(bench1, n_iter=3)

    opt_copy = opt.copy()

    copied_estimator = opt_copy.base_estimator_

    if "ps" in acq_func:
        assert isinstance(copied_estimator, MultiOutputRegressor)
        # check that the base_estimator is not wrapped multiple times
        is_multi = isinstance(copied_estimator.estimator, MultiOutputRegressor)
        assert not is_multi
    else:
        assert not isinstance(copied_estimator, MultiOutputRegressor)

    assert_array_equal(opt_copy.Xi, opt.Xi)
    assert_array_equal(opt_copy.yi, opt.yi)


@pytest.mark.parametrize("base_estimator", ESTIMATOR_STRINGS)
def test_exhaust_initial_calls(base_estimator):
    # check a model is fitted and used to make suggestions after we added
    # at least n_initial_points via tell()
    opt = Optimizer(
        [(-2.0, 2.0)],
        base_estimator,
        n_initial_points=2,
        acq_optimizer="sampling",
        random_state=1,
    )

    x0 = opt.ask()  # random point
    x1 = opt.ask()  # random point
    assert x0 != x1
    # first call to tell()
    r1 = opt.tell(x1, 3.0)
    assert len(r1.models) == 0
    x2 = opt.ask()  # random point
    assert x1 != x2
    # second call to tell()
    r2 = opt.tell(x2, 4.0)
    if base_estimator.lower() == 'dummy':
        assert len(r2.models) == 0
    else:
        assert len(r2.models) == 1
    # this is the first non-random point
    x3 = opt.ask()
    assert x2 != x3
    x4 = opt.ask()
    r3 = opt.tell(x3, 1.0)
    # no new information was added so should be the same, unless we are using
    # the dummy estimator which will forever return random points and never
    # fits any models
    if base_estimator.lower() == 'dummy':
        assert x3 != x4
        assert len(r3.models) == 0
    else:
        assert x3 == x4
        assert len(r3.models) == 2


@pytest.mark.fast_test
def test_optimizer_base_estimator_string_invalid():
    with pytest.raises(ValueError) as e:
        Optimizer([(-2.0, 2.0)], base_estimator="rtr", n_initial_points=1)
    assert "'RF', 'ET', 'GP', 'GBRT' or 'DUMMY'" in str(e.value)


@pytest.mark.fast_test
@pytest.mark.parametrize("base_estimator", ESTIMATOR_STRINGS)
def test_optimizer_base_estimator_string_smoke(base_estimator):
    opt = Optimizer(
        [(-2.0, 2.0)], base_estimator=base_estimator, n_initial_points=2, acq_func="EI"
    )
    opt.run(func=lambda x: x[0] ** 2, n_iter=3)


@pytest.mark.fast_test
def test_optimizer_base_estimator_string_smoke_njobs():
    opt = Optimizer(
        [(-2.0, 2.0)],
        base_estimator="GBRT",
        n_initial_points=1,
        acq_func="EI",
        n_jobs=-1,
    )
    opt.run(func=lambda x: x[0] ** 2, n_iter=3)


def test_defaults_are_equivalent():
    # check that the defaults of Optimizer reproduce the defaults of
    # gp_minimize
    space = [(-5.0, 10.0), (0.0, 15.0)]
    # opt = Optimizer(space, 'ET', acq_func="EI", random_state=1)
    opt = Optimizer(space, random_state=1)

    for _ in range(12):
        x = opt.ask()
        res_opt = opt.tell(x, branin(x))

    # res_min = forest_minimize(branin, space, n_calls=12, random_state=1)
    res_min = gp_minimize(branin, space, n_calls=12, random_state=1)

    assert res_min.space == res_opt.space
    # tolerate small differences in the points sampled
    assert np.allclose(res_min.x_iters, res_opt.x_iters)
    assert np.allclose(res_min.x, res_opt.x)

    res_opt2 = opt.get_result()
    assert np.allclose(res_min.x_iters, res_opt2.x_iters)
    assert np.allclose(res_min.x, res_opt2.x)


@pytest.mark.fast_test
def test_dimensions_names():
    from skopt.space import Categorical, Integer, Real

    # create search space and optimizer
    space = [
        Real(0, 1, name='real'),
        Categorical(['a', 'b', 'c'], name='cat'),
        Integer(0, 1, name='int'),
    ]
    opt = Optimizer(space, n_initial_points=2)
    # result of the optimizer missing dimension names
    result = opt.tell([(0.5, 'a', 0.5)], [3])
    names = []
    for d in result.space.dimensions:
        names.append(d.name)
    assert len(names) == 3
    assert "real" in names
    assert "cat" in names
    assert "int" in names
    assert None not in names


@pytest.mark.fast_test
def test_categorical_only():
    from skopt.space import Categorical

    cat1 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
    cat2 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

    opt = Optimizer([cat1, cat2])
    for n in range(15):
        x = opt.ask()
        res = opt.tell(x, 12 * n)
    assert len(res.x_iters) == 15
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4

    cat3 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])
    cat4 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])

    opt = Optimizer([cat3, cat4])
    for n in range(15):
        x = opt.ask()
        res = opt.tell(x, 12 * n)
    assert len(res.x_iters) == 15
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4


def test_categorical_only2():
    from numpy import linalg

    from skopt.learning import GaussianProcessRegressor
    from skopt.space import Categorical

    space = [Categorical([1, 2, 3]), Categorical([4, 5, 6])]
    opt = Optimizer(
        space,
        base_estimator=GaussianProcessRegressor(alpha=1e-7),
        acq_optimizer='lbfgs',
        n_initial_points=10,
        n_jobs=2,
    )

    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
    opt.tell(next_x, [linalg.norm(x) for x in next_x])
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
    opt.tell(next_x, [linalg.norm(x) for x in next_x])
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4