1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
#include "Halide.h"
#include <stdio.h>
using namespace Halide;
using namespace Halide::Internal;
// This was a failing case from https://github.com/halide/Halide/issues/1618
class CheckAllocationSize : public IRVisitor {
using IRVisitor::visit;
void visit(const Allocate *op) override {
if (op->name == "input_cpy") {
result = op->extents[0];
} else {
op->body.accept(this);
}
}
public:
Expr result;
};
int main(int argc, char **argv) {
Var x, y, xout, xin;
ImageParam input(type_of<int16_t>(), 2);
Func input_cpy("input_cpy");
input_cpy(x, y) = input(x, y);
Func input_cpy_2;
input_cpy_2(x, y) = input_cpy(x, y);
Func sum_stage;
sum_stage(x, y) = (input_cpy_2(x, y - 4) +
input_cpy_2(x, y - 3) +
input_cpy_2(x, y - 2) +
input_cpy_2(x, y - 1) +
input_cpy_2(x, y));
Func sum_stage_cpy;
sum_stage_cpy(x, y) = sum_stage(x, y);
Func sum_stage_cpy_2;
sum_stage_cpy_2(x, y) = sum_stage_cpy(x, y);
// bound the output to a fixed size
sum_stage_cpy_2.bound(x, 0, 512);
sum_stage_cpy_2.bound(y, 0, 512);
// This stage was grossly overdimensioned by bounds inference: it
// should only need 5 complete lines (512 * 5) = 2560 pixels.
input_cpy.compute_at(sum_stage_cpy, y);
input_cpy_2.compute_at(sum_stage_cpy, xout)
.split(x, xout, xin, 32)
.unroll(xout, 4);
sum_stage_cpy
.compute_at(sum_stage_cpy_2, y)
.split(x, xout, xin, 32)
.unroll(xout, 4);
Module m = sum_stage_cpy_2.compile_to_module({input});
CheckAllocationSize checker;
m.functions()[0].body.accept(&checker);
if (!is_const(checker.result, 512)) {
std::cerr << m.functions()[0].body << "\n\n"
<< "Allocation size was supposed to be 512 in dimension 0 in the stmt above\n";
return -1;
}
printf("Success!\n");
return 0;
}
|