1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
pytestmark = pytest.mark.skipif(**tm.no_pandas())
dpath = 'demo/data/'
rng = np.random.RandomState(1994)
class TestTreesToDataFrame:
def build_model(self, max_depth, num_round):
dtrain, _ = tm.load_agaricus(__file__)
param = {'max_depth': max_depth, 'objective': 'binary:logistic',
'verbosity': 1}
num_round = num_round
bst = xgb.train(param, dtrain, num_round)
return bst
def parse_dumped_model(self, booster, item_to_get, splitter):
item_to_get += '='
txt_dump = booster.get_dump(with_stats=True)
tree_list = [tree.split('/n') for tree in txt_dump]
split_trees = [tree[0].split(item_to_get)[1:] for tree in tree_list]
res = sum([float(line.split(splitter)[0])
for tree in split_trees for line in tree])
return res
def test_trees_to_dataframe(self):
bst = self.build_model(max_depth=5, num_round=10)
gain_from_dump = self.parse_dumped_model(booster=bst,
item_to_get='gain',
splitter=',')
cover_from_dump = self.parse_dumped_model(booster=bst,
item_to_get='cover',
splitter='\n')
# method being tested
df = bst.trees_to_dataframe()
# test for equality of gains
gain_from_df = df[df.Feature != 'Leaf'][['Gain']].sum()
assert np.allclose(gain_from_dump, gain_from_df)
# test for equality of covers
cover_from_df = df.Cover.sum()
assert np.allclose(cover_from_dump, cover_from_df)
def run_tree_to_df_categorical(self, tree_method: str) -> None:
X, y = tm.make_categorical(100, 10, 31, onehot=False)
Xy = xgb.DMatrix(X, y, enable_categorical=True)
booster = xgb.train({"tree_method": tree_method}, Xy, num_boost_round=10)
df = booster.trees_to_dataframe()
for _, x in df.iterrows():
if x["Feature"] != "Leaf":
assert len(x["Category"]) >= 1
def test_tree_to_df_categorical(self) -> None:
self.run_tree_to_df_categorical("approx")
def run_split_value_histograms(self, tree_method) -> None:
X, y = tm.make_categorical(1000, 10, 13, onehot=False)
reg = xgb.XGBRegressor(tree_method=tree_method, enable_categorical=True)
reg.fit(X, y)
with pytest.raises(ValueError, match="doesn't"):
reg.get_booster().get_split_value_histogram("3", bins=5)
def test_split_value_histograms(self):
self.run_split_value_histograms("approx")
|