File: vectorized_reduction_bug.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (70 lines) | stat: -rw-r--r-- 1,833 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include "Halide.h"
#include <stdio.h>
using namespace Halide;

int main(int argc, char *argv[]) {
    {
        Func sum("sum"), foo("foo");
        Var x("x"), y("y"), c("c");

        RDom r(1, 2, "r");

        // sum(x, y) should equal 3
        sum(x, y) += r.x;

        foo(x, y, c) = select(c == 3, 255, sum(x, y));
        // foo(x, y, c) should equal (3, 3, 3, 255);

        foo.vectorize(c, 4);

        Buffer<int32_t> output = foo.realize({2, 2, 4});
        for (int y = 0; y < 2; y++) {
            for (int x = 0; x < 2; x++) {
                for (int c = 0; c < 4; c++) {
                    int correct = (c == 3 ? 255 : 3);
                    if (output(x, y, c) != correct) {
                        printf("output(%d, %d, %d) = %d instead of %d\n",
                               x, y, c, output(x, y, c), correct);
                    }
                }
            }
        }
    }

    {
        Func f("f"), g("g"), h("h");
        Var x("x"), y("y"), c("c");

        h(x, y) = x + y;
        h.compute_root();

        g(x, y) = 0;
        g(x, 0) = sum(h(x, RDom(0, 120)));

        // Transpose.
        f(y, x) = g(x, y);

        Var x_outer("x_outer");
        f.split(x, x_outer, x, 8 * 2);

        g.compute_at(f, x_outer);
        g.update(0).vectorize(x);

        f.compute_root();
        Buffer<int32_t> im = f.realize({100, 100});

        for (int y = 0; y < im.height(); y++) {
            for (int x = 0; x < im.width(); x++) {
                int correct = (x != 0) ? 0 : 120 * (x + y) + 120 * 119 / 2;
                if (im(x, y) != correct) {
                    printf("im(%d, %d) = %d instead of %d\n",
                           x, y, im(x, y), correct);
                    return 1;
                }
            }
        }
    }

    printf("Success!\n");
    return 0;
}