File: test_categorical.cc

package info (click to toggle)
xgboost 3.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 13,796 kB
  • sloc: cpp: 67,502; python: 35,503; java: 4,676; ansic: 1,426; sh: 1,320; xml: 1,197; makefile: 204; javascript: 19
file content (93 lines) | stat: -rw-r--r-- 2,603 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*!
 * Copyright 2021-2022 by XGBoost Contributors
 */
#include <gtest/gtest.h>
#include <xgboost/json.h>
#include <xgboost/learner.h>

#include <limits>

#include "../../../src/common/categorical.h"
#include "../helpers.h"

namespace xgboost {
namespace common {
TEST(Categorical, Decision) {
  // inf
  float a = std::numeric_limits<float>::infinity();

  ASSERT_TRUE(common::InvalidCat(a));
  std::vector<uint32_t> cats(256, 0);
  ASSERT_TRUE(Decision(cats, a));

  // larger than size
  a = 256;
  ASSERT_TRUE(Decision(cats, a));

  // negative
  a = -1;
  ASSERT_TRUE(Decision(cats, a));

  CatBitField bits{cats};
  bits.Set(0);
  a = -0.5;
  ASSERT_TRUE(Decision(cats, a));

  // round toward 0
  a = 0.5;
  ASSERT_FALSE(Decision(cats, a));

  // valid
  a = 13;
  bits.Set(a);
  ASSERT_FALSE(Decision(bits.Bits(), a));
}

/**
 * Test for running inference with input category greater than the one stored in tree.
 */
TEST(Categorical, MinimalSet) {
  std::size_t constexpr kRows = 256, kCols = 1, kCat = 3;
  std::vector<FeatureType> types{FeatureType::kCategorical};
  auto Xy =
      RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true);

  std::unique_ptr<Learner> learner{Learner::Create({Xy})};
  learner->SetParam("max_depth", "1");
  learner->SetParam("tree_method", "hist");
  learner->Configure();
  learner->UpdateOneIter(0, Xy);

  Json model{Object{}};
  learner->SaveModel(&model);
  auto tree = model["learner"]["gradient_booster"]["model"]["trees"][0];
  ASSERT_GE(get<I32Array const>(tree["categories"]).size(), 1);
  auto v = get<I32Array const>(tree["categories"])[0];

  HostDeviceVector<float> predt;
  {
    std::vector<float> data{static_cast<float>(kCat),
                            static_cast<float>(kCat + 1), 32.0f, 33.0f, 34.0f};
    auto test = GetDMatrixFromData(data, data.size(), kCols);
    learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
    ASSERT_EQ(predt.Size(), data.size());
    auto const& h_predt = predt.ConstHostSpan();
    for (auto v : h_predt) {
      ASSERT_EQ(v, 1);  // left child of root node
    }
  }

  {
    std::unique_ptr<Learner> learner{Learner::Create({Xy})};
    learner->LoadModel(model);
    std::vector<float> data = {static_cast<float>(v)};
    auto test = GetDMatrixFromData(data, data.size(), kCols);
    learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true);
    auto const& h_predt = predt.ConstHostSpan();
    for (auto v : h_predt) {
      ASSERT_EQ(v, 2);  // right child of root node
    }
  }
}
}  // namespace common
}  // namespace xgboost