File: test_interaction_constraints.py

package info (click to toggle)
xgboost 3.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 13,796 kB
  • sloc: cpp: 67,502; python: 35,503; java: 4,676; ansic: 1,426; sh: 1,320; xml: 1,197; makefile: 204; javascript: 19
file content (118 lines) | stat: -rw-r--r-- 4,864 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import pytest

import xgboost
from xgboost import testing as tm

dpath = 'demo/data/'
rng = np.random.RandomState(1994)


class TestInteractionConstraints:
    def run_interaction_constraints(
        self, tree_method, feature_names=None, interaction_constraints='[[0, 1]]'
    ):
        x1 = np.random.normal(loc=1.0, scale=1.0, size=1000)
        x2 = np.random.normal(loc=1.0, scale=1.0, size=1000)
        x3 = np.random.choice([1, 2, 3], size=1000, replace=True)
        y = x1 + x2 + x3 + x1 * x2 * x3 \
            + np.random.normal(
                loc=0.001, scale=1.0, size=1000) + 3 * np.sin(x1)
        X = np.column_stack((x1, x2, x3))
        dtrain = xgboost.DMatrix(X, label=y, feature_names=feature_names)

        params = {
            'max_depth': 3,
            'eta': 0.1,
            'nthread': 2,
            'interaction_constraints': interaction_constraints,
            'tree_method': tree_method
        }
        num_boost_round = 12
        # Fit a model that only allows interaction between x1 and x2
        bst = xgboost.train(
            params, dtrain, num_boost_round, evals=[(dtrain, 'train')])

        # Set all observations to have the same x3 values then increment
        #   by the same amount
        def f(x):
            tmat = xgboost.DMatrix(
                np.column_stack((x1, x2, np.repeat(x, 1000))), feature_names=feature_names)
            return bst.predict(tmat)

        preds = [f(x) for x in [1, 2, 3]]

        # Check incrementing x3 has the same effect on all observations
        #   since x3 is constrained to be independent of x1 and x2
        #   and all observations start off from the same x3 value
        diff1 = preds[1] - preds[0]
        assert np.all(np.abs(diff1 - diff1[0]) < 1e-4)
        diff2 = preds[2] - preds[1]
        assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)

    def test_exact_interaction_constraints(self):
        self.run_interaction_constraints(tree_method='exact')

    def test_hist_interaction_constraints(self):
        self.run_interaction_constraints(tree_method='hist')

    def test_approx_interaction_constraints(self):
        self.run_interaction_constraints(tree_method='approx')

    def test_interaction_constraints_feature_names(self):
        with pytest.raises(ValueError):
            constraints = [('feature_0', 'feature_1')]
            self.run_interaction_constraints(tree_method='exact',
                                             interaction_constraints=constraints)

        with pytest.raises(ValueError):
            constraints = [('feature_0', 'feature_3')]
            feature_names = ['feature_0', 'feature_1', 'feature_2']
            self.run_interaction_constraints(tree_method='exact',
                                             feature_names=feature_names,
                                             interaction_constraints=constraints)

        constraints = [('feature_0', 'feature_1')]
        feature_names = ['feature_0', 'feature_1', 'feature_2']
        self.run_interaction_constraints(tree_method='exact',
                                         feature_names=feature_names,
                                         interaction_constraints=constraints)

        constraints = [['feature_0', 'feature_1'], ['feature_2']]
        feature_names = ['feature_0', 'feature_1', 'feature_2']
        self.run_interaction_constraints(tree_method='exact',
                                         feature_names=feature_names,
                                         interaction_constraints=constraints)

    @pytest.mark.skipif(**tm.no_sklearn())
    def training_accuracy(self, tree_method):
        """Test accuracy, reused by GPU tests."""
        from sklearn.metrics import accuracy_score
        dtrain = xgboost.DMatrix(
            dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm"
        )
        dtest = xgboost.DMatrix(
            dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm"
        )
        params = {
            'eta': 1,
            'max_depth': 6,
            'objective': 'binary:logistic',
            'tree_method': tree_method,
            'interaction_constraints': '[[1,2], [2,3,4]]'
        }
        num_boost_round = 5

        params['grow_policy'] = 'lossguide'
        bst = xgboost.train(params, dtrain, num_boost_round)
        pred_dtest = (bst.predict(dtest) < 0.5)
        assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1

        params['grow_policy'] = 'depthwise'
        bst = xgboost.train(params, dtrain, num_boost_round)
        pred_dtest = (bst.predict(dtest) < 0.5)
        assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1

    @pytest.mark.parametrize("tree_method", ["hist", "approx", "exact"])
    def test_hist_training_accuracy(self, tree_method):
        self.training_accuracy(tree_method=tree_method)