1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <c10/cuda/CUDADeviceAssertion.h>
#include <c10/cuda/CUDAException.h>
#include <c10/cuda/CUDAFunctions.h>
#include <c10/cuda/CUDAStream.h>
#include <chrono>
#include <iostream>
#include <string>
#include <thread>
using ::testing::HasSubstr;
const auto max_assertions_failure_str =
"Assertion failure " + std::to_string(C10_CUDA_DSA_ASSERTION_COUNT - 1);
/**
* Device kernel that takes a single integer parameter as argument and
* will always trigger a device side assertion.
*/
__global__ void cuda_always_fail_assertion_kernel(
const int a,
TORCH_DSA_KERNEL_ARGS) {
CUDA_KERNEL_ASSERT2(a != a);
}
/**
* TEST: Triggering device side assertion from multiple block but single thread
* <<<10,1>>>. Here we are triggering assertion on 10 blocks, each with only 1
* thread. Since we have more than 10 SM on a GPU, we expect each block to be
* executed and successfully assert, Hence we will see assertions logged from
* each block here.
*/
void cuda_device_assertions_multiple_writes_from_multiple_blocks() {
const auto stream = c10::cuda::getStreamFromPool();
TORCH_DSA_KERNEL_LAUNCH(
cuda_always_fail_assertion_kernel,
10, /* Blocks */
1, /* Threads */
0, /* Shared mem */
stream, /* Stream */
1);
try {
c10::cuda::device_synchronize();
throw std::runtime_error("Test didn't fail, but should have.");
} catch (const c10::Error& err) {
const auto err_str = std::string(err.what());
ASSERT_THAT(err_str, HasSubstr(max_assertions_failure_str));
ASSERT_THAT(
err_str, HasSubstr("Thread ID that failed assertion = [0,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [0,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [1,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [2,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [3,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [4,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [5,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [6,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [7,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [8,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Block ID that failed assertion = [9,0,0]"));
ASSERT_THAT(err_str, HasSubstr("Device that launched kernel = 0"));
ASSERT_THAT(
err_str,
HasSubstr(
"Name of kernel launched that led to failure = cuda_always_fail_assertion_kernel"));
ASSERT_THAT(
err_str, HasSubstr("File containing kernel launch = " __FILE__));
ASSERT_THAT(
err_str,
HasSubstr(
"Function containing kernel launch = " +
std::string(__FUNCTION__)));
ASSERT_THAT(
err_str,
HasSubstr(
"Stream kernel was launched on = " + std::to_string(stream.id())));
}
}
TEST(CUDATest, cuda_device_assertions_multiple_writes_from_multiple_blocks) {
#ifdef TORCH_USE_CUDA_DSA
c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref().enabled_at_runtime = true;
cuda_device_assertions_multiple_writes_from_multiple_blocks();
#else
GTEST_SKIP() << "CUDA device-side assertions (DSA) was not enabled at compile time.";
#endif
}
|