1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
from typing import Any, Dict
from hypothesis import given, note, settings, strategies
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.params import (
exact_parameter_strategy,
hist_cache_strategy,
hist_multi_parameter_strategy,
hist_parameter_strategy,
)
from xgboost.testing.updater import ResetStrategy, train_result
class TestTreeMethodMulti:
@given(
exact_parameter_strategy, strategies.integers(1, 20), tm.multi_dataset_strategy
)
@settings(deadline=None, print_blob=True)
def test_exact(self, param: dict, num_rounds: int, dataset: tm.TestDataset) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "exact"
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_approx(
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
param["tree_method"] = "approx"
param = dataset.set_params(param)
param.update(hist_param)
param.update(cache_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))
assert tm.non_increasing(result["train"][dataset.metric])
@given(
exact_parameter_strategy,
hist_multi_parameter_strategy,
hist_cache_strategy,
strategies.integers(1, 20),
tm.multi_dataset_strategy,
)
@settings(deadline=None, print_blob=True)
def test_hist(
self,
param: Dict[str, Any],
hist_param: Dict[str, Any],
cache_param: Dict[str, Any],
num_rounds: int,
dataset: tm.TestDataset,
) -> None:
if dataset.name.endswith("-l1"):
return
param["tree_method"] = "hist"
param = dataset.set_params(param)
param.update(hist_param)
param.update(cache_param)
result = train_result(param, dataset.get_dmat(), num_rounds)
note(str(result))
assert tm.non_increasing(result["train"][dataset.metric])
def test_multiclass() -> None:
X, y = tm.datasets.make_classification(
128, n_features=12, n_informative=10, n_classes=4
)
clf = xgb.XGBClassifier(
multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
)
clf.fit(X, y, eval_set=[(X, y)])
assert clf.objective == "multi:softprob"
assert tm.non_increasing(clf.evals_result()["validation_0"]["mlogloss"])
proba = clf.predict_proba(X)
assert proba.shape == (y.shape[0], 4)
def test_multilabel() -> None:
X, y = tm.datasets.make_multilabel_classification(128)
clf = xgb.XGBClassifier(
multi_strategy="multi_output_tree", callbacks=[ResetStrategy()], n_estimators=10
)
clf.fit(X, y, eval_set=[(X, y)])
assert clf.objective == "binary:logistic"
assert tm.non_increasing(clf.evals_result()["validation_0"]["logloss"])
proba = clf.predict_proba(X)
assert proba.shape == y.shape
|