File: test_sparse_page_raw_format.cc

package info (click to toggle)
xgboost 3.0.0-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 13,796 kB
  • sloc: cpp: 67,502; python: 35,503; java: 4,676; ansic: 1,426; sh: 1,320; xml: 1,197; makefile: 204; javascript: 19
file content (62 lines) | stat: -rw-r--r-- 2,208 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/**
 * Copyright 2021-2024, XGBoost contributors
 */
#include <gtest/gtest.h>
#include <xgboost/data.h>  // for CSCPage, SortedCSCPage, SparsePage

#include <memory>  // for allocator, unique_ptr, __shared_ptr_ac...
#include <string>  // for char_traits, operator+, basic_string

#include "../../../src/common/io.h"  // for PrivateMmapConstStream, AlignedResourceReadStream...
#include "../../../src/data/sparse_page_writer.h"  // for CreatePageFormat
#include "../helpers.h"                            // for RandomDataGenerator
#include "dmlc/filesystem.h"                       // for TemporaryDirectory
#include "xgboost/context.h"                       // for Context

namespace xgboost::data {
template <typename S> void TestSparsePageRawFormat() {
  std::unique_ptr<SparsePageFormat<S>> format{CreatePageFormat<S>("raw")};
  Context ctx;

  auto m = RandomDataGenerator{100, 14, 0.5}.GenerateDMatrix();
  ASSERT_TRUE(m->SingleColBlock());
  dmlc::TemporaryDirectory tmpdir;
  std::string path = tmpdir.path + "/sparse.page";
  S orig;
  std::size_t n_bytes{0};
  {
    // block code to flush the stream
    auto fo = std::make_unique<common::AlignedFileWriteStream>(StringView{path}, "wb");
    for (auto const &page : m->GetBatches<S>(&ctx)) {
      orig.Push(page);
      n_bytes = format->Write(page, fo.get());
    }
  }

  S page;
  std::unique_ptr<common::AlignedResourceReadStream> fi{
      std::make_unique<common::PrivateMmapConstStream>(path.c_str(), 0, n_bytes)};
  format->Read(&page, fi.get());
  for (size_t i = 0; i < orig.data.Size(); ++i) {
    ASSERT_EQ(page.data.HostVector()[i].fvalue,
              orig.data.HostVector()[i].fvalue);
    ASSERT_EQ(page.data.HostVector()[i].index, orig.data.HostVector()[i].index);
  }
  for (size_t i = 0; i < orig.offset.Size(); ++i) {
    ASSERT_EQ(page.offset.HostVector()[i], orig.offset.HostVector()[i]);
  }
  ASSERT_EQ(page.base_rowid, orig.base_rowid);
}

TEST(SparsePageRawFormat, SparsePage) {
  TestSparsePageRawFormat<SparsePage>();
}

TEST(SparsePageRawFormat, CSCPage) {
  TestSparsePageRawFormat<CSCPage>();
}

TEST(SparsePageRawFormat, SortedCSCPage) {
  TestSparsePageRawFormat<SortedCSCPage>();
}
}  // namespace xgboost::data