File: 008_specify_params.py

package info (click to toggle)
optuna 4.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,784 kB
  • sloc: python: 40,634; sh: 97; makefile: 30
file content (146 lines) | stat: -rw-r--r-- 5,293 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
.. _specify_params:

Specify Hyperparameters Manually
================================

It's natural that you have some specific sets of hyperparameters to try first such as initial learning rate
values and the number of leaves.
Also, it's possible that you've already tried those sets before having Optuna find better
sets of hyperparameters.

Optuna provides two APIs to support such cases:

1. Passing those sets of hyperparameters and let Optuna evaluate them - :func:`~optuna.study.Study.enqueue_trial`
2. Adding the results of those sets as completed ``Trial``\\s - :func:`~optuna.study.Study.add_trial`

.. _enqueue_trial_tutorial:

---------------------------------------------------------
First Scenario: Have Optuna evaluate your hyperparameters
---------------------------------------------------------

In this scenario, let's assume you have some out-of-box sets of hyperparameters but have not
evaluated them yet and decided to use Optuna to find better sets of hyperparameters.

Optuna has :func:`optuna.study.Study.enqueue_trial` which lets you pass those sets of
hyperparameters to Optuna and Optuna will evaluate them.

This section walks you through how to use this lit API with `LightGBM <https://lightgbm.readthedocs.io/en/stable/>`__.
"""

import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

import optuna


###################################################################################################
# Define the objective function.
def objective(trial):
    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = lgb.Dataset(train_x, label=train_y)
    dvalid = lgb.Dataset(valid_x, label=valid_y)

    param = {
        "objective": "binary",
        "metric": "auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "bagging_fraction": min(trial.suggest_float("bagging_fraction", 0.4, 1.0 + 1e-12), 1),
        "bagging_freq": trial.suggest_int("bagging_freq", 0, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }

    gbm = lgb.train(param, dtrain, valid_sets=[dvalid])

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy


###################################################################################################
# Then, construct ``Study`` for hyperparameter optimization.

study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())

###################################################################################################
# Here, we get Optuna evaluate some sets with larger ``"bagging_fraq"`` value and
# the default values.

study.enqueue_trial(
    {
        "bagging_fraction": 1.0,
        "bagging_freq": 0,
        "min_child_samples": 20,
    }
)

study.enqueue_trial(
    {
        "bagging_fraction": 0.75,
        "bagging_freq": 5,
        "min_child_samples": 20,
    }
)

import logging
import sys

# Add stream handler of stdout to show the messages to see Optuna works expectedly.
optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
study.optimize(objective, n_trials=100, timeout=600)

###################################################################################################
# .. _add_trial_tutorial:
#
# ----------------------------------------------------------------------
# Second scenario: Have Optuna utilize already evaluated hyperparameters
# ----------------------------------------------------------------------
#
# In this scenario, let's assume you have some out-of-box sets of hyperparameters and
# you have already evaluated them but the results are not desirable so that you are thinking of
# using Optuna.
#
# Optuna has :func:`optuna.study.Study.add_trial` which lets you register those results
# to Optuna and then Optuna will sample hyperparameters taking them into account.
#
# In this section,  the ``objective`` is the same as the first scenario.

study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.add_trial(
    optuna.trial.create_trial(
        params={
            "bagging_fraction": 1.0,
            "bagging_freq": 0,
            "min_child_samples": 20,
        },
        distributions={
            "bagging_fraction": optuna.distributions.FloatDistribution(0.4, 1.0 + 1e-12),
            "bagging_freq": optuna.distributions.IntDistribution(0, 7),
            "min_child_samples": optuna.distributions.IntDistribution(5, 100),
        },
        value=0.94,
    )
)
study.add_trial(
    optuna.trial.create_trial(
        params={
            "bagging_fraction": 0.75,
            "bagging_freq": 5,
            "min_child_samples": 20,
        },
        distributions={
            "bagging_fraction": optuna.distributions.FloatDistribution(0.4, 1.0 + 1e-12),
            "bagging_freq": optuna.distributions.IntDistribution(0, 7),
            "min_child_samples": optuna.distributions.IntDistribution(5, 100),
        },
        value=0.95,
    )
)
study.optimize(objective, n_trials=100, timeout=600)