1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
|
#include "Halide.h"
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
Target t = get_jit_target_from_environment();
if (!t.has_gpu_feature()) {
printf("[SKIP] No GPU target enabled.\n");
return 0;
}
if (t.has_feature(Target::OpenGLCompute)) {
printf("[SKIP] Skipping test for OpenGLCompute, as it does not support dynamically-sized shared memory\n");
return 0;
}
// Check dynamic allocations per-block and per-thread into both
// shared and global
for (int per_thread = 0; per_thread < 2; per_thread++) {
for (auto memory_type : {MemoryType::GPUShared, MemoryType::Heap}) {
Func f("f"), g("g");
Var x("x"), xi("xi");
f(x) = x;
g(x) = f(x) + f(2 * x);
g.gpu_tile(x, xi, 16);
if (per_thread) {
f.compute_at(g, xi);
} else {
f.compute_at(g, x).gpu_threads(x);
}
f.store_in(memory_type);
// The amount of shared/heap memory required varies with x
Buffer<int> out = g.realize({100});
for (int x = 0; x < 100; x++) {
int correct = 3 * x;
if (out(x) != correct) {
printf("out[%d|%d](%d) = %d instead of %d\n",
per_thread, (int)memory_type, x, out(x), correct);
return -1;
}
}
}
}
printf("Success!\n");
return 0;
}
|