File: halton.py

package info (click to toggle)
scikit-optimize 0.10.2-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 7,672 kB
  • sloc: python: 10,659; javascript: 438; makefile: 136; sh: 6
file content (180 lines) | stat: -rw-r--r-- 5,945 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
Inspired by https://github.com/jonathf/chaospy/blob/master/chaospy/
distributions/sampler/sequences/halton.py
"""

import numpy as np
from sklearn.utils import check_random_state

from ..space import Space
from .base import InitialPointGenerator


class Halton(InitialPointGenerator):
    """Creates `Halton` sequence samples.

    In statistics, Halton sequences are sequences used to generate
    points in space for numerical methods such as Monte Carlo simulations.
    Although these sequences are deterministic, they are of low discrepancy,
    that is, appear to be random
    for many purposes. They were first introduced in 1960 and are an example
    of a quasi-random number sequence. They generalise the one-dimensional
    van der Corput sequences.

    For ``dim == 1`` the sequence falls back to Van Der Corput sequence.

    Parameters
    ----------
    min_skip : int
        Minimum skipped seed number. When `min_skip != max_skip`
        a random number is picked.
    max_skip : int
        Maximum skipped seed number. When `min_skip != max_skip`
        a random number is picked.
    primes : tuple, default=None
        The (non-)prime base to calculate values along each axis. If
        empty or None, growing prime values starting from 2 will be used.
    """

    def __init__(self, min_skip=0, max_skip=0, primes=None):
        self.primes = primes
        self.min_skip = min_skip
        self.max_skip = max_skip

    def generate(self, dimensions, n_samples, random_state=None):
        """Creates samples from Halton set.

        Parameters
        ----------
        dimensions : list, shape (n_dims,)
            List of search space dimensions.
            Each search dimension can be defined either as

            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
              dimensions),
            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
              dimensions),
            - as a list of categories (for `Categorical` dimensions), or
            - an instance of a `Dimension` object (`Real`, `Integer` or
              `Categorical`).
        n_samples : int
            The order of the Halton sequence. Defines the number of samples.
        random_state : int, RandomState instance, or None (default)
            Set random state to something other than None for reproducible
            results.

        Returns
        -------
        np.array, shape=(n_dim, n_samples)
            Halton set.
        """
        rng = check_random_state(random_state)
        if self.primes is None:
            primes = []
        else:
            primes = list(self.primes)
        space = Space(dimensions)
        n_dim = space.n_dims
        transformer = space.get_transformer()
        space.set_transformer("normalize")
        if len(primes) < n_dim:
            prime_order = 10 * n_dim
            while len(primes) < n_dim:
                primes = _create_primes(prime_order)
                prime_order *= 2

            primes = primes[:n_dim]
        assert len(primes) == n_dim, "not enough primes"

        if self.min_skip == self.max_skip:
            skip = self.min_skip
        elif self.min_skip < 0 and self.max_skip < 0:
            skip = max(primes)
        elif self.min_skip < 0 or self.max_skip < 0:
            skip = np.max(self.min_skip, self.max_skip)
        else:
            skip = rng.randint(self.min_skip, self.max_skip)

        out = np.empty((n_dim, n_samples))
        indices = [idx + skip for idx in range(n_samples)]
        for dim_ in range(n_dim):
            out[dim_] = _van_der_corput_samples(indices, number_base=primes[dim_])
        out = space.inverse_transform(np.transpose(out))
        space.set_transformer(transformer)
        return out


def _van_der_corput_samples(idx, number_base=2):
    """Create `Van Der Corput` low discrepancy sequence samples.

    A van der Corput sequence is an example of the simplest one-dimensional
    low-discrepancy sequence over the unit interval; it was first described in
    1935 by the Dutch mathematician J. G. van der Corput. It is constructed by
    reversing the base-n representation of the sequence of natural numbers
    (1, 2, 3, ...).

    In practice, use Halton sequence instead of Van Der Corput, as it is the
    same, but generalized to work in multiple dimensions.

    Parameters
    ----------
    idx (int, numpy.ndarray):
        The index of the sequence. If array is provided, all values in
        array is returned.
    number_base : int
        The numerical base from where to create the samples from.

    Returns
    -------
    float, numpy.ndarray
        Van der Corput samples.
    """
    assert number_base > 1

    idx = np.asarray(idx).flatten()
    out = np.zeros(len(idx), dtype=float)

    base = float(number_base)
    active = np.ones(len(idx), dtype=bool)
    while np.any(active):
        out[active] += (idx[active] % number_base) / base
        idx //= number_base
        base *= number_base
        active = idx > 0
    return out


def _create_primes(threshold):
    """Generate prime values using sieve of Eratosthenes method.

    Parameters
    ----------
    threshold : int
        The upper bound for the size of the prime values.

    Returns
    ------
    List
        All primes from 2 and up to ``threshold``.
    """
    if threshold == 2:
        return [2]

    elif threshold < 2:
        return []

    numbers = list(range(3, threshold + 1, 2))
    root_of_threshold = threshold**0.5
    half = int((threshold + 1) / 2 - 1)
    idx = 0
    counter = 3
    while counter <= root_of_threshold:
        if numbers[idx]:
            idy = int((counter * counter - 3) / 2)
            numbers[idy] = 0
            while idy < half:
                numbers[idy] = 0
                idy += counter
        idx += 1
        counter = 2 * idx + 3
    return [2] + [number for number in numbers if number]