File: specialize_to_gpu.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (59 lines) | stat: -rw-r--r-- 1,449 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include "Halide.h"
#include <stdio.h>

using namespace Halide;

int main(int argc, char **argv) {
#ifdef WITH_SERIALIZATION_JIT_ROUNDTRIP_TESTING
    printf("[SKIP] Serialization won't preserve GPU buffers, skipping.\n");
    return 0;
#endif

    if (!get_jit_target_from_environment().has_gpu_feature()) {
        printf("[SKIP] No GPU target enabled.\n");
        return 0;
    }

    // A sequence of stages which may or may not run on the gpu.
    Func f, g, h;
    ImageParam in(Int(32), 1);
    Var x, xi;

    f(x) = in(x) + in(x + 1);
    g(x) = f(x * 2);
    h(x) = g(x) - 7;

    Param<bool> gpu_f, gpu_g, gpu_h;

    f.compute_root().specialize(gpu_f).gpu_tile(x, x, xi, 16);
    g.compute_root().specialize(gpu_g).gpu_tile(x, x, xi, 16);
    h.compute_root().specialize(gpu_h).gpu_tile(x, x, xi, 16);

    Buffer<int> out(128), reference(128), input(256);

    lambda(x, x * 17 + 43 + x * x).realize(input);
    in.set(input);

    gpu_f.set(false);
    gpu_g.set(false);
    gpu_h.set(false);
    h.realize(reference);

    for (int i = 1; i < 8; i++) {
        gpu_f.set((i & 1) != 0);
        gpu_g.set((i & 2) != 0);
        gpu_h.set((i & 4) != 0);

        h.realize(out);

        RDom r(out);
        uint32_t err = evaluate<uint32_t>(sum(abs(out(r) - reference(r))));
        if (err) {
            printf("Incorrect results for test %d\n", i);
            return 1;
        }
    }

    printf("Success!\n");
    return 0;
}