1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
|
#include "Halide.h"
using namespace Halide;
Var x, y;
void check(Func f) {
Buffer<int> out = f.realize({256, 256});
out.for_each_element([&](int x, int y) {
if (out(x, y) != x + y) {
printf("out(%d, %d) = %d instead of %d\n", x, y, out(x, y), x + y);
exit(1);
}
});
}
void make_pipeline(Func &A, Func &B) {
A(x, y) = x + y;
B(x, y) = A(x, y);
}
int main(int argc, char **argv) {
Target target = get_jit_target_from_environment();
if (target.arch == Target::WebAssembly) {
printf("[SKIP] WebAssembly does not support async() yet.\n");
return 0;
}
if (target.has_feature(Target::Vulkan) && (target.os == Target::Windows)) {
printf("[SKIP] Skipping test for Vulkan on Windows ... fails unless run on its own!\n");
return 0;
}
// Make a list of extern pipeline stages (just copies) all async
// and connected by double buffers, then try various nestings of
// them. This is a stress test of the async extern storage folding
// logic.
// Basic double-buffered A->B, with no extern stages
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(B, y).fold_storage(y, 2).async();
check(B);
}
// Inject a copy stage between them
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(B, y).fold_storage(y, 2).async();
A.in().store_root().compute_at(B, y).fold_storage(y, 2).async().copy_to_host();
check(B);
}
// Inject a copy stage between them, but nest the first stage into it
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(A.in(), Var::outermost()).fold_storage(y, 2).async();
A.in().store_root().compute_at(B, y).fold_storage(y, 2).async().copy_to_host();
check(B);
}
// Two copy stages, flat
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(B, y).fold_storage(y, 2).async();
A.in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_host().async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_host().async();
check(B);
}
// Two copy stages, each stage nested inside the outermost var of the next
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(A.in(), Var::outermost()).fold_storage(y, 2).async();
A.in().store_root().compute_at(A.in().in(), Var::outermost()).fold_storage(y, 2).copy_to_host().async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_host().async();
check(B);
}
if (get_jit_target_from_environment().has_gpu_feature()) {
// Two copy stages, to the device and back, flat
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(B, y).fold_storage(y, 2).async();
A.in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_device().async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_host().async();
check(B);
}
// Two copy stages, to the device and back, each stage nested inside the outermost var of the next
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(A.in(), Var::outermost()).fold_storage(y, 2).async();
A.in().store_root().compute_at(A.in().in(), Var::outermost()).fold_storage(y, 2).copy_to_device().async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_host().async();
check(B);
}
// The same, but make one of the copy stages non-extern to force a shared host-dev allocation
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(B, y).fold_storage(y, 2).async();
A.in().store_root().compute_at(B, y).fold_storage(y, 2).async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_host().async();
check(B);
}
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(A.in(), Var::outermost()).fold_storage(y, 2).async();
A.in().store_root().compute_at(A.in().in(), Var::outermost()).fold_storage(y, 2).async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_host().async();
check(B);
}
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(B, y).fold_storage(y, 2).async();
A.in().store_root().compute_at(B, y).fold_storage(y, 2).copy_to_device().async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).async();
check(B);
}
{
Func A, B;
make_pipeline(A, B);
A.store_root().compute_at(A.in(), Var::outermost()).fold_storage(y, 2).async();
A.in().store_root().compute_at(A.in().in(), Var::outermost()).fold_storage(y, 2).copy_to_device().async();
A.in().in().store_root().compute_at(B, y).fold_storage(y, 2).async();
check(B);
}
}
printf("Success!\n");
return 0;
}
|