File: test_gpu_linear.py

package info (click to toggle)
xgboost 3.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 13,796 kB
  • sloc: cpp: 67,502; python: 35,503; java: 4,676; ansic: 1,426; sh: 1,320; xml: 1,197; makefile: 204; javascript: 19
file content (94 lines) | stat: -rw-r--r-- 3,088 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import pytest
from hypothesis import assume, given, note, settings, strategies

import xgboost as xgb
from xgboost import testing as tm

pytestmark = tm.timeout(10)

parameter_strategy = strategies.fixed_dictionaries(
    {
        "booster": strategies.just("gblinear"),
        "eta": strategies.floats(0.01, 0.25),
        "tolerance": strategies.floats(1e-5, 1e-2),
        "nthread": strategies.integers(1, 4),
        "feature_selector": strategies.sampled_from(
            ["cyclic", "shuffle", "greedy", "thrifty"]
        ),
        "top_k": strategies.integers(1, 10),
    }
)


def train_result(param, dmat, num_rounds):
    result = {}
    booster = xgb.train(
        param,
        dmat,
        num_rounds,
        [(dmat, "train")],
        verbose_eval=False,
        evals_result=result,
    )
    assert booster.num_boosted_rounds() == num_rounds
    return result


class TestGPULinear:
    @given(parameter_strategy, strategies.integers(10, 50), tm.make_dataset_strategy())
    @settings(deadline=None, max_examples=20, print_blob=True)
    def test_gpu_coordinate(self, param, num_rounds, dataset):
        assume(len(dataset.y) > 0)
        param["updater"] = "gpu_coord_descent"
        param = dataset.set_params(param)
        result = train_result(param, dataset.get_dmat(), num_rounds)["train"][
            dataset.metric
        ]
        note(result)
        assert tm.non_increasing(result)

    # Loss is not guaranteed to always decrease because of regularisation parameters
    # We test a weaker condition that the loss has not increased between the first and last
    # iteration
    @given(
        parameter_strategy,
        strategies.integers(10, 50),
        tm.make_dataset_strategy(),
        strategies.floats(1e-5, 0.8),
        strategies.floats(1e-5, 0.8),
    )
    @settings(deadline=None, max_examples=20, print_blob=True)
    def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
        assume(len(dataset.y) > 0)
        param["updater"] = "gpu_coord_descent"
        param["alpha"] = alpha
        param["lambda"] = lambd
        param = dataset.set_params(param)
        result = train_result(param, dataset.get_dmat(), num_rounds)["train"][
            dataset.metric
        ]
        note(result)
        assert tm.non_increasing([result[0], result[-1]])

    @pytest.mark.skipif(**tm.no_cupy())
    def test_gpu_coordinate_from_cupy(self):
        # Training linear model is quite expensive, so we don't include it in
        # test_from_cupy.py
        import cupy

        params = {
            "booster": "gblinear",
            "updater": "gpu_coord_descent",
            "n_estimators": 100,
        }
        X, y = tm.get_california_housing()
        cpu_model = xgb.XGBRegressor(**params)
        cpu_model.fit(X, y)
        cpu_predt = cpu_model.predict(X)

        X = cupy.array(X)
        y = cupy.array(y)
        gpu_model = xgb.XGBRegressor(**params)
        gpu_model.fit(X, y)
        gpu_predt = gpu_model.predict(X)
        cupy.testing.assert_allclose(cpu_predt, gpu_predt)