1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
|
#include "HalideBuffer.h"
#include "HalideRuntime.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#if defined(TEST_CUDA)
#include "HalideRuntimeCuda.h"
#elif defined(TEST_OPENCL)
#include "HalideRuntimeOpenCL.h"
#elif defined(TEST_METAL)
#include "HalideRuntimeMetal.h"
#elif defined(TEST_VULKAN)
#include "HalideRuntimeVulkan.h"
#endif
#include "gpu_object_lifetime.h"
#include "gpu_object_lifetime_tracker.h"
using namespace Halide::Runtime;
Halide::Internal::GpuObjectLifetimeTracker tracker;
void my_halide_print(void *user_context, const char *str) {
printf("%s", str);
tracker.record_gpu_debug(str);
}
int main(int argc, char **argv) {
#if defined(TEST_CUDA)
printf("TEST_CUDA enabled for gpu_object_lifetime testing...\n");
#elif defined(TEST_OPENCL)
printf("TEST_OPENCL enabled for gpu_object_lifetime testing...\n");
#elif defined(TEST_METAL)
printf("TEST_METAL enabled for gpu_object_lifetime testing...\n");
#elif defined(TEST_VULKAN)
printf("TEST_VULKAN enabled for gpu_object_lifetime testing...\n");
#else
// TODO: we can't support WebGPU here (yet) because our WebGPU runtime doesn't
// (yet) support halide_webgpu_wrap_native(); when it does, we should be able
// to add it here.
printf("[SKIP] No GPU features enabled for gpu_object_lifetime testing!\n");
return 0;
#endif
halide_set_custom_print(&my_halide_print);
// Run the whole program several times.
for (int wrap_memory = 0; wrap_memory < 2; wrap_memory++) {
// Do an explicit copy-back and device free.
{
int scratch[80];
Buffer<int, 1> output = wrap_memory ? Buffer<int, 1>(scratch, 80) : Buffer<int, 1>(80);
gpu_object_lifetime(output);
output.copy_to_host();
output.device_free();
for (int x = 0; x < output.width(); x++) {
if (output(x) != x) {
printf("Error! (explicit copy back %d): %d != %d\n", wrap_memory, output(x), x);
return 1;
}
}
}
// Do an explicit copy-back but no device free
{
int scratch[80];
Buffer<int, 1> output = wrap_memory ? Buffer<int, 1>(scratch, 80) : Buffer<int, 1>(80);
gpu_object_lifetime(output);
output.copy_to_host();
for (int x = 0; x < output.width(); x++) {
if (output(x) != x) {
printf("Error! (explicit copy back, no device free %d): %d != %d\n", wrap_memory, output(x), x);
return 1;
}
}
}
// Do no explicit copy-back and no device free
{
int scratch[80];
Buffer<int, 1> output = wrap_memory ? Buffer<int, 1>(scratch, 80) : Buffer<int, 1>(80);
gpu_object_lifetime(output);
}
// Test coverage for Halide::Runtime::Buffer device pointer management.
{
Buffer<int, 1> output(80);
// Call Halide filter to get a device allocation.
gpu_object_lifetime(output);
{
// Construct a new buffer from the halide_buffer_t and let it destruct.
// Verifies this does not deallocate or otherwise disable the device handle.
Buffer<int, 1> temp(*output.raw_buffer());
}
output.copy_to_host();
}
// Do this test twice to test explicit unwrapping and letting the destructor do it.
for (int i = 0; i < 2; i++) {
Buffer<int, 1> output(80);
// Call Halide filter to get a device allocation.
gpu_object_lifetime(output);
// This is ugly. Getting a native device handle from scratch requires writing API
// dependent code. Instead, we reuse a Halide allocated handle from an API where we know
// the device field is just a raw device handle. If we don't know this about the API,
// we don't test anything here.
// This gets some minimal test coverage for code paths in Halide::Runtime::Buffer.
bool can_rewrap = false;
uintptr_t native_handle = 0;
#if defined(TEST_CUDA)
if (output.raw_buffer()->device_interface == halide_cuda_device_interface()) {
native_handle = output.raw_buffer()->device;
can_rewrap = true;
}
#elif defined(TEST_OPENCL)
if (output.raw_buffer()->device_interface == halide_opencl_device_interface()) {
native_handle = halide_opencl_get_cl_mem(nullptr, output.raw_buffer());
can_rewrap = true;
}
#elif defined(TEST_METAL)
if (output.raw_buffer()->device_interface == halide_metal_device_interface()) {
native_handle = halide_metal_get_buffer(nullptr, output.raw_buffer());
can_rewrap = true;
}
#endif
if (can_rewrap) {
Buffer<int, 1> wrap_test(80);
wrap_test.device_wrap_native(output.raw_buffer()->device_interface, native_handle);
wrap_test.set_device_dirty();
wrap_test.copy_to_host();
output.copy_to_host();
for (int x = 0; x < output.width(); x++) {
if (output(x) != wrap_test(x)) {
printf("Error! (wrap native test %d): %d != %d\n", i, output(x), wrap_test(x));
return 1;
}
}
if (i == 1) {
wrap_test.device_detach_native();
}
}
}
// Test coverage for Halide::Runtime::Buffer construction from halide_buffer_t, unmanaged
{
Buffer<int, 1> output(80);
halide_buffer_t raw_buf = *output.raw_buffer();
// Call Halide filter to get a device allocation.
gpu_object_lifetime(&raw_buf);
{
Buffer<int, 1> copy(raw_buf);
}
// Note that a nonzero result should be impossible here (in theory)
int result = halide_device_free(nullptr, &raw_buf);
if (result != 0) {
printf("Error! halide_device_free() returned: %d\n", result);
return 1;
}
}
// Test coverage for Halide::Runtime::Buffer construction from halide_buffer_t, taking ownership
{
Buffer<int, 1> output(80);
halide_buffer_t raw_buf = *output.raw_buffer();
// Call Halide filter to get a device allocation.
gpu_object_lifetime(&raw_buf);
Buffer<int, 1> copy(raw_buf, Halide::Runtime::BufferDeviceOwnership::Allocated);
}
// Test combined device and host allocation support.
{
Buffer<int, 1> output(80);
gpu_object_lifetime(output);
if (output.raw_buffer()->device_interface != nullptr) {
Buffer<int, 1> output2(nullptr, 80);
output2.device_and_host_malloc(output.raw_buffer()->device_interface);
gpu_object_lifetime(output2);
output.copy_to_host();
output2.copy_to_host();
for (int x = 0; x < output.width(); x++) {
if (output(x) != output2(x)) {
printf("Error! (device and host allocation test): %d != %d\n", output(x), output2(x));
return 1;
}
}
}
}
#if defined(TEST_CUDA)
halide_device_release(nullptr, halide_cuda_device_interface());
#elif defined(TEST_OPENCL)
halide_device_release(nullptr, halide_opencl_device_interface());
#elif defined(TEST_METAL)
halide_device_release(nullptr, halide_metal_device_interface());
#elif defined(TEST_VULKAN)
halide_device_release(nullptr, halide_vulkan_device_interface());
#endif
}
int ret = tracker.validate_gpu_object_lifetime(false /* allow_globals */, true /* allow_none */, 2 /* max_globals */);
if (ret != 0) {
fprintf(stderr, "validate_gpu_object_lifetime() failed\n");
return 1;
}
printf("Success!\n");
return 0;
}
|