File: gpu_mixed_dimensionality.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (55 lines) | stat: -rw-r--r-- 1,499 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include "Halide.h"
#include <stdio.h>

using namespace Halide;

int main(int argc, char **argv) {
    Target target = get_jit_target_from_environment();
    if (!target.has_gpu_feature()) {
        printf("[SKIP] No GPU target enabled.\n");
        return 0;
    }

    Func f("f"), g("g"), h("h"), out("out");
    Var x("x"), y("y"), z("z");

    f(x, y, z) = x + y + z;
    f(x, y, z) += 1;
    g(x, y, z) = f(x, y, z);
    g(x, y, z) += 1;
    h(x, y, z) = g(x, y, z);
    h(x, y, z) += 1;
    out(x, y, z) = h(x, y, z);
    out(x, y, z) += 1;

    Var xi("xi"), yi("yi"), zi("zi");
    out.gpu_tile(x, y, z, xi, yi, zi, 4, 4, 4);
    out.update().gpu_tile(x, y, xi, yi, 4, 4);
    h.compute_at(out, x).gpu_threads(x, y);
    h.update().gpu_threads(x);
    // TODO: NormalizeDimensionality in FuseGPUThreadLoops.cpp doesn't work in the following case.
    // g.compute_at(h, y).gpu_threads(x);
    // g.update();
    g.compute_at(h, x);
    g.update();
    f.compute_at(g, x);
    f.update();

    Buffer<int> o = out.realize({64, 64, 64});

    for (int z = 0; z < 64; z++) {
        for (int y = 0; y < 64; y++) {
            for (int x = 0; x < 64; x++) {
                int correct = x + y + z + 4;
                if (o(x, y, z) != correct) {
                    printf("out(%d, %d, %d) = %d instead of %d\n",
                           x, y, z, o(x, y, z), correct);
                    return 1;
                }
            }
        }
    }

    printf("Success!\n");
    return 0;
}