1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
#include "Halide.h"
#include <algorithm>
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
Var x, y;
Func f, g;
// Up to about 40MB/image * 2 buffers seems to work on luxosr, when freshly booted
// 130MB works on 2GB Quadro 4000 when freshly booted
// Here we'll allocated 10MB/image * 2 buffers, so that the test passes reliably.
int W = 1024 * 10 / 4, H = 1024;
printf("Defining function...\n");
f(x, y) = max(x, y);
g(x, y) = clamp(f(x, y), 20, 100);
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
Var xi, yi;
f.compute_root().gpu_tile(x, y, xi, yi, 16, 16);
g.compute_root().gpu_tile(x, y, xi, yi, 16, 16);
}
printf("Realizing function...\n");
Buffer<int> img = g.realize({W, H}, target);
for (int i = 0; i < W; i++) {
for (int j = 0; j < H; j++) {
int m = std::max(i, j);
const int expected = std::min(std::max(m, 20), 100);
if (img(i, j) != expected) {
printf("img[%d, %d] = %d\n", i, j, img(i, j));
return 1;
}
}
}
printf("Success!\n");
return 0;
}
|