File: bayesian_optimization.py

package info (click to toggle)
python-bayesian-optimization 2.0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 19,816 kB
  • sloc: python: 2,820; makefile: 26; sh: 9
file content (359 lines) | stat: -rw-r--r-- 13,996 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
"""Main module.

Holds the `BayesianOptimization` class, which handles the maximization of a
function over a specific target space.
"""

from __future__ import annotations

from collections import deque
from typing import TYPE_CHECKING, Any
from warnings import warn

import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern

from bayes_opt import acquisition
from bayes_opt.constraint import ConstraintModel
from bayes_opt.event import DEFAULT_EVENTS, Events
from bayes_opt.logger import _get_default_logger
from bayes_opt.target_space import TargetSpace
from bayes_opt.util import ensure_rng

if TYPE_CHECKING:
    from collections.abc import Callable, Iterable, Mapping, Sequence

    from numpy.random import RandomState
    from numpy.typing import NDArray
    from scipy.optimize import NonlinearConstraint

    from bayes_opt.acquisition import AcquisitionFunction
    from bayes_opt.constraint import ConstraintModel
    from bayes_opt.domain_reduction import DomainTransformer

    Float = np.floating[Any]


class Observable:
    """Inspired by https://www.protechtraining.com/blog/post/879#simple-observer."""

    def __init__(self, events: Iterable[Any]) -> None:
        # maps event names to subscribers
        # str -> dict
        self._events = {event: dict() for event in events}

    def get_subscribers(self, event: Any) -> Any:
        """Return the subscribers of an event."""
        return self._events[event]

    def subscribe(self, event: Any, subscriber: Any, callback: Callable[..., Any] | None = None) -> None:
        """Add subscriber to an event."""
        if callback is None:
            callback = subscriber.update
        self.get_subscribers(event)[subscriber] = callback

    def unsubscribe(self, event: Any, subscriber: Any) -> None:
        """Remove a subscriber for a particular event."""
        del self.get_subscribers(event)[subscriber]

    def dispatch(self, event: Any) -> None:
        """Trigger callbacks for subscribers of an event."""
        for callback in self.get_subscribers(event).values():
            callback(event, self)


class BayesianOptimization(Observable):
    """Handle optimization of a target function over a specific target space.

    This class takes the function to optimize as well as the parameters bounds
    in order to find which values for the parameters yield the maximum value
    using bayesian optimization.

    Parameters
    ----------
    f: function or None.
        Function to be maximized.

    pbounds: dict
        Dictionary with parameters names as keys and a tuple with minimum
        and maximum values.

    constraint: NonlinearConstraint.
        Note that the names of arguments of the constraint function and of
        f need to be the same.

    random_state: int or numpy.random.RandomState, optional(default=None)
        If the value is an integer, it is used as the seed for creating a
        numpy.random.RandomState. Otherwise the random state provided is used.
        When set to None, an unseeded random state is generated.

    verbose: int, optional(default=2)
        The level of verbosity.

    bounds_transformer: DomainTransformer, optional(default=None)
        If provided, the transformation is applied to the bounds.

    allow_duplicate_points: bool, optional (default=False)
        If True, the optimizer will allow duplicate points to be registered.
        This behavior may be desired in high noise situations where repeatedly probing
        the same point will give different answers. In other situations, the acquisition
        may occasionally generate a duplicate point.
    """

    def __init__(
        self,
        f: Callable[..., float] | None,
        pbounds: Mapping[str, tuple[float, float]],
        acquisition_function: AcquisitionFunction | None = None,
        constraint: NonlinearConstraint | None = None,
        random_state: int | RandomState | None = None,
        verbose: int = 2,
        bounds_transformer: DomainTransformer | None = None,
        allow_duplicate_points: bool = False,
    ):
        self._random_state = ensure_rng(random_state)
        self._allow_duplicate_points = allow_duplicate_points
        self._queue: deque[Mapping[str, float] | Sequence[float] | NDArray[Float]] = deque()

        if acquisition_function is None:
            if constraint is None:
                self._acquisition_function = acquisition.UpperConfidenceBound(
                    kappa=2.576, random_state=self._random_state
                )
            else:
                self._acquisition_function = acquisition.ExpectedImprovement(
                    xi=0.01, random_state=self._random_state
                )
        else:
            self._acquisition_function = acquisition_function

        # Internal GP regressor
        self._gp = GaussianProcessRegressor(
            kernel=Matern(nu=2.5),
            alpha=1e-6,
            normalize_y=True,
            n_restarts_optimizer=5,
            random_state=self._random_state,
        )

        if constraint is None:
            # Data structure containing the function to be optimized, the
            # bounds of its domain, and a record of the evaluations we have
            # done so far
            self._space = TargetSpace(
                f, pbounds, random_state=random_state, allow_duplicate_points=self._allow_duplicate_points
            )
            self.is_constrained = False
        else:
            constraint_ = ConstraintModel(
                constraint.fun, constraint.lb, constraint.ub, random_state=random_state
            )
            self._space = TargetSpace(
                f,
                pbounds,
                constraint=constraint_,
                random_state=random_state,
                allow_duplicate_points=self._allow_duplicate_points,
            )
            self.is_constrained = True

        self._verbose = verbose
        self._bounds_transformer = bounds_transformer
        if self._bounds_transformer:
            try:
                self._bounds_transformer.initialize(self._space)
            except (AttributeError, TypeError) as exc:
                error_msg = "The transformer must be an instance of DomainTransformer"
                raise TypeError(error_msg) from exc

        self._sorting_warning_already_shown = False  # TODO: remove in future version
        super().__init__(events=DEFAULT_EVENTS)

    @property
    def space(self) -> TargetSpace:
        """Return the target space associated with the optimizer."""
        return self._space

    @property
    def acquisition_function(self) -> AcquisitionFunction:
        """Return the acquisition function associated with the optimizer."""
        return self._acquisition_function

    @property
    def constraint(self) -> ConstraintModel | None:
        """Return the constraint associated with the optimizer, if any."""
        if self.is_constrained:
            return self._space.constraint
        return None

    @property
    def max(self) -> dict[str, Any] | None:
        """Get maximum target value found and corresponding parameters.

        See `TargetSpace.max` for more information.
        """
        return self._space.max()

    @property
    def res(self) -> list[dict[str, Any]]:
        """Get all target values and constraint fulfillment for all parameters.

        See `TargetSpace.res` for more information.
        """
        return self._space.res()

    def register(
        self,
        params: Mapping[str, float] | Sequence[float] | NDArray[Float],
        target: float,
        constraint_value: float | NDArray[Float] | None = None,
    ) -> None:
        """Register an observation with known target.

        Parameters
        ----------
        params: dict or list
            The parameters associated with the observation.

        target: float
            Value of the target function at the observation.

        constraint_value: float or None
            Value of the constraint function at the observation, if any.
        """
        # TODO: remove in future version
        if isinstance(params, np.ndarray) and not self._sorting_warning_already_shown:
            msg = (
                "You're attempting to register an np.ndarray. Currently, the optimizer internally sorts"
                " parameters by key and expects any registered array to respect this order. In future"
                " versions this behaviour will change and the order as given by the pbounds dictionary"
                " will be used. If you wish to retain sorted parameters, please manually sort your pbounds"
                " dictionary before constructing the optimizer."
            )
            warn(msg, stacklevel=1)
            self._sorting_warning_already_shown = True
        self._space.register(params, target, constraint_value)
        self.dispatch(Events.OPTIMIZATION_STEP)

    def probe(
        self, params: Mapping[str, float] | Sequence[float] | NDArray[Float], lazy: bool = True
    ) -> None:
        """Evaluate the function at the given points.

        Useful to guide the optimizer.

        Parameters
        ----------
        params: dict or list
            The parameters where the optimizer will evaluate the function.

        lazy: bool, optional(default=True)
            If True, the optimizer will evaluate the points when calling
            maximize(). Otherwise it will evaluate it at the moment.
        """
        # TODO: remove in future version
        if isinstance(params, np.ndarray) and not self._sorting_warning_already_shown:
            msg = (
                "You're attempting to register an np.ndarray. Currently, the optimizer internally sorts"
                " parameters by key and expects any registered array to respect this order. In future"
                " versions this behaviour will change and the order as given by the pbounds dictionary"
                " will be used. If you wish to retain sorted parameters, please manually sort your pbounds"
                " dictionary before constructing the optimizer."
            )
            warn(msg, stacklevel=1)
            self._sorting_warning_already_shown = True
            params = self._space.array_to_params(params)
        if lazy:
            self._queue.append(params)
        else:
            self._space.probe(params)
            self.dispatch(Events.OPTIMIZATION_STEP)

    def suggest(self) -> dict[str, float]:
        """Suggest a promising point to probe next."""
        if len(self._space) == 0:
            return self._space.array_to_params(self._space.random_sample())

        # Finding argmax of the acquisition function.
        suggestion = self._acquisition_function.suggest(gp=self._gp, target_space=self._space, fit_gp=True)

        return self._space.array_to_params(suggestion)

    def _prime_queue(self, init_points: int) -> None:
        """Ensure the queue is not empty.

        Parameters
        ----------
        init_points: int
            Number of parameters to prime the queue with.
        """
        if not self._queue and self._space.empty:
            init_points = max(init_points, 1)

        for _ in range(init_points):
            sample = self._space.random_sample()
            self._queue.append(self._space.array_to_params(sample))

    def _prime_subscriptions(self) -> None:
        if not any([len(subs) for subs in self._events.values()]):
            _logger = _get_default_logger(self._verbose, self.is_constrained)
            self.subscribe(Events.OPTIMIZATION_START, _logger)
            self.subscribe(Events.OPTIMIZATION_STEP, _logger)
            self.subscribe(Events.OPTIMIZATION_END, _logger)

    def maximize(self, init_points: int = 5, n_iter: int = 25) -> None:
        r"""
        Maximize the given function over the target space.

        Parameters
        ----------
        init_points : int, optional(default=5)
            Number of random points to probe before starting the optimization.

        n_iter: int, optional(default=25)
            Number of iterations where the method attempts to find the maximum
            value.

        Warning
        -------
            The maximize loop only fits the GP when suggesting a new point to
            probe based on the acquisition function. This means that the GP may
            not be fitted on all points registered to the target space when the
            method completes. If you intend to use the GP model after the
            optimization routine, make sure to fit it manually, e.g. by calling
            ``optimizer._gp.fit(optimizer.space.params, optimizer.space.target)``.
        """
        self._prime_subscriptions()
        self.dispatch(Events.OPTIMIZATION_START)
        self._prime_queue(init_points)

        iteration = 0
        while self._queue or iteration < n_iter:
            try:
                x_probe = self._queue.popleft()
            except IndexError:
                x_probe = self.suggest()
                iteration += 1
            self.probe(x_probe, lazy=False)

            if self._bounds_transformer and iteration > 0:
                # The bounds transformer should only modify the bounds after
                # the init_points points (only for the true iterations)
                self.set_bounds(self._bounds_transformer.transform(self._space))

        self.dispatch(Events.OPTIMIZATION_END)

    def set_bounds(self, new_bounds: Mapping[str, NDArray[Float] | Sequence[float]]) -> None:
        """Modify the bounds of the search space.

        Parameters
        ----------
        new_bounds : dict
            A dictionary with the parameter name and its new bounds
        """
        self._space.set_bounds(new_bounds)

    def set_gp_params(self, **params: Any) -> None:
        """Set parameters of the internal Gaussian Process Regressor."""
        self._gp.set_params(**params)