1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
#include "Halide.h"
#include <iostream>
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
{
Var x("x"), y("y"), xi("xi"), yi("yi");
Func f("f");
printf("Defining function...\n");
f(x, y) = x * y + 2.4f;
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 8, 8, TailStrategy::GuardWithIf).vectorize(xi, 4, TailStrategy::GuardWithIf);
}
printf("Realizing function...\n");
Buffer<float> imf = f.realize({32, 32}, target);
// Check the result was what we expected
for (int i = 0; i < 32; i++) {
for (int j = 0; j < 32; j++) {
float correct = i * j + 2.4f;
if (fabs(imf(i, j) - correct) > 0.001f) {
printf("imf[%d, %d] = %f instead of %f\n", i, j, imf(i, j), correct);
return 1;
}
}
}
}
{
Var x("x"), y("y"), xi("xi"), yi("yi");
Func f("f");
ImageParam im(Float(32), 2);
printf("Defining function...\n");
f(x, y) = x * y + 2.4f + im(x, y);
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 8, 8, TailStrategy::GuardWithIf).vectorize(xi, 4, TailStrategy::GuardWithIf);
}
printf("Realizing function...\n");
Buffer<float> input_img(32, 32);
for (int i = 0; i < 32; i++) {
for (int j = 0; j < 32; j++) {
input_img(i, j) = i + j;
}
}
im.set(input_img);
Buffer<float> imf = f.realize({32, 32}, target);
// Check the result was what we expected
for (int i = 0; i < 32; i++) {
for (int j = 0; j < 32; j++) {
float correct = i * j + 2.4f + i + j;
if (fabs(imf(i, j) - correct) > 0.001f) {
printf("imf[%d, %d] = %f instead of %f\n", i, j, imf(i, j), correct);
return 1;
}
}
}
}
{
Var x("x"), y("y"), xi("xi"), yi("yi");
Func f("f");
ImageParam im(Float(32), 2);
printf("Defining function...\n");
f(x, y) = select(im(x, y) > 32.0f, 1.0f, -1.0f) + im(x, y);
Target target = get_jit_target_from_environment();
if (target.has_gpu_feature()) {
f.gpu_tile(x, y, xi, yi, 8, 8, TailStrategy::GuardWithIf).vectorize(xi, 4, TailStrategy::GuardWithIf);
}
printf("Realizing function...\n");
Buffer<float> input_img(32, 32);
for (int i = 0; i < 32; i++) {
for (int j = 0; j < 32; j++) {
input_img(i, j) = i + j;
}
}
im.set(input_img);
Buffer<float> imf = f.realize({32, 32}, target);
// Check the result was what we expected
for (int i = 0; i < 32; i++) {
for (int j = 0; j < 32; j++) {
float correct = (i + j > 32 ? 1.0f : -1.0f) + i + j;
if (fabs(imf(i, j) - correct) > 0.001f) {
printf("imf[%d, %d] = %f instead of %f\n", i, j, imf(i, j), correct);
return 1;
}
}
}
}
printf("Success!\n");
return 0;
}
|