File: reorder_rvars.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (71 lines) | stat: -rw-r--r-- 1,780 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "Halide.h"
#include <stdio.h>

using namespace Halide;

int main(int argc, char **argv) {
    Var x("x"), y("y");

    {
        RDom r1(0, 10, 1, 10);
        RDom r2(0, 10, 3, 10, 0, 5);

        // Define two identical functions

        Func f("f");
        f(x, y) = x + y;
        f(x, y) += r1.x * r1.y;
        f(x, r2.x) -= r2.z * f(x, r2.x + r2.y);

        Func g("g");
        g(x, y) = x + y;
        g(x, y) += r1.x * r1.y;
        g(x, r2.x) -= r2.z * g(x, r2.x + r2.y);

        // Reorder g
        g.reorder(y, x);
        // It is legal to reorder r1.x and r1.y
        // because stage g.update(0) is associative.
        g.update(0).reorder(r1.y, y, x, r1.x);
        g.update(1).reorder(r2.x, x, r2.y, r2.z);
        g.compute_root();
        f.compute_root();

        RDom r3(0, 20, 0, 20);
        Expr check = sum(abs(f(r3.x, r3.y) - g(r3.x, r3.y)));

        int err = evaluate_may_gpu<int>(cast<int>(check));

        if (err != 0) {
            printf("Reordering rvars affected the meaning!\n");
            return 1;
        }
    }

    // And now, a practical use-case for reorder rvars
    {
        Func input;
        input(x, y) = x * y;

        // Compute summed-area table
        Func sat;
        sat(x, y) = input(x, y);

        RDom r(1, 99);
        sat(x, r) += sat(x, r - 1);
        sat(r, y) += sat(r - 1, y);

        // Walk down the columns in vectors.
        Var xo, xi;
        sat.update().split(x, xo, xi, 4).reorder(xi, r, xo).vectorize(xi).parallel(xo);

        // Walk along the rows in parallel. For this we want the loop
        // over y outside of the loop over r, which is the default.
        sat.update(1).parallel(y);

        sat.realize({100, 100});
    }

    printf("Success!\n");
    return 0;
}