File: containers.cpp

package info (click to toggle)
pytorch-cuda 2.6.0%2Bdfsg-7
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 161,620 kB
  • sloc: python: 1,278,832; cpp: 900,322; ansic: 82,710; asm: 7,754; java: 3,363; sh: 2,811; javascript: 2,443; makefile: 597; ruby: 195; xml: 84; objc: 68
file content (90 lines) | stat: -rw-r--r-- 2,837 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include <algorithm>
#include <cmath>
#include <utility>
#include <vector>

#include <gtest/gtest.h>

#include <c10/util/ApproximateClock.h>
#include <c10/util/irange.h>
#include <torch/csrc/profiler/containers.h>
#include <torch/csrc/profiler/util.h>

TEST(ProfilerTest, AppendOnlyList) {
  const int n = 4096;
  torch::profiler::impl::AppendOnlyList<int, 1024> list;
  for (const auto i : c10::irange(n)) {
    list.emplace_back(i);
    ASSERT_EQ(list.size(), i + 1);
  }

  int expected = 0;
  for (const auto i : list) {
    ASSERT_EQ(i, expected++);
  }
  ASSERT_EQ(expected, n);

  list.clear();
  ASSERT_EQ(list.size(), 0);
}

TEST(ProfilerTest, AppendOnlyList_ref) {
  const int n = 512;
  torch::profiler::impl::AppendOnlyList<std::pair<int, int>, 64> list;
  std::vector<std::pair<int, int>*> refs;
  for (const auto _ : c10::irange(n)) {
    refs.push_back(list.emplace_back());
  }

  for (const auto i : c10::irange(n)) {
    *refs.at(i) = {i, 0};
  }

  int expected = 0;
  for (const auto& i : list) {
    ASSERT_EQ(i.first, expected++);
  }
}

// Test that we can convert TSC measurements back to wall clock time.
TEST(ProfilerTest, clock_converter) {
  const int n = 10001;
  c10::ApproximateClockToUnixTimeConverter converter;
  std::vector<
      c10::ApproximateClockToUnixTimeConverter::UnixAndApproximateTimePair>
      pairs;
  for (const auto i : c10::irange(n)) {
    pairs.push_back(c10::ApproximateClockToUnixTimeConverter::measurePair());
  }
  auto count_to_ns = converter.makeConverter();
  std::vector<int64_t> deltas;
  for (const auto& i : pairs) {
    deltas.push_back(i.t_ - count_to_ns(i.approx_t_));
  }
  std::sort(deltas.begin(), deltas.end());

  // In general it's not a good idea to put clocks in unit tests as it leads
  // to flakiness. We mitigate this by:
  //   1) Testing the clock itself. While the time to complete a task may
  //      vary, two clocks measuring the same time should be much more
  //      consistent.
  //   2) Only testing the interquartile range. Context switches between
  //      calls to the two timers do occur and can result in hundreds of
  //      nanoseconds of noise, but such switches are only a few percent
  //      of cases.
  //   3) We're willing to accept a somewhat large bias which can emerge from
  //      differences in the cost of calling each clock.
  EXPECT_LT(std::abs(deltas[n / 2]), 200);
  EXPECT_LT(deltas[n * 3 / 4] - deltas[n / 4], 50);
}

TEST(ProfilerTest, soft_assert) {
  EXPECT_TRUE(SOFT_ASSERT(true));
  torch::profiler::impl::setSoftAssertRaises(true);
  EXPECT_ANY_THROW(SOFT_ASSERT(false));
  torch::profiler::impl::setSoftAssertRaises(false);
  EXPECT_NO_THROW(SOFT_ASSERT(false));
  // Reset soft assert behavior to default
  torch::profiler::impl::setSoftAssertRaises(std::nullopt);
  EXPECT_NO_THROW(SOFT_ASSERT(false));
}