File: define_extern_opencl_generator.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (42 lines) | stat: -rw-r--r-- 1,308 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#include "Halide.h"

namespace {

class DefineExternOpenCLOutput : public Halide::Generator<DefineExternOpenCLOutput> {
public:
    Input<Buffer<int32_t, 1>> input{"input"};
    Output<Func> output{"output", Int(32), 1};

    Var x{"x"};
    // make_a_root is necessary as there doesn't seem to be a way to
    // get from Input<Buffer<int32_t>> to ExternFuncArgument otherwise.
    Func make_a_root{"make_a_root"};
    Func gpu_input{"gpu_input"};

    void generate() {
        make_a_root(x) = input(x);
        ExternFuncArgument arg = make_a_root;

        if (get_target().supports_device_api(Halide::DeviceAPI::OpenCL)) {
            gpu_input.define_extern("gpu_input", {arg}, Halide::type_of<int32_t>(), 1, NameMangling::Default, Halide::DeviceAPI::OpenCL);
        } else {
            gpu_input(x) = input(x);
        }

        output(x) = gpu_input(x) - 41;
    }

    void schedule() {
        make_a_root.compute_root();
        gpu_input.compute_root();
        if (get_target().has_feature(Target::OpenCL)) {
            Var block_x, thread_x;
            output.gpu_tile(x, block_x, thread_x, Expr(16),
                            TailStrategy::Auto, Halide::DeviceAPI::OpenCL);
        }
    }
};

}  // namespace

HALIDE_REGISTER_GENERATOR(DefineExternOpenCLOutput, define_extern_opencl)