File: define_extern_opencl_generator.cpp

package info (click to toggle)
halide 14.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 49,124 kB
  • sloc: cpp: 238,722; makefile: 4,303; python: 4,047; java: 1,575; sh: 1,384; pascal: 211; xml: 165; javascript: 43; ansic: 34
file content (37 lines) | stat: -rw-r--r-- 1,164 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include "Halide.h"

namespace {

class DefineExternOpenCLOutput : public Halide::Generator<DefineExternOpenCLOutput> {
public:
    Input<Buffer<int32_t, 1>> input{"input"};
    Output<Func> output{"output", Int(32), 1};

    Var x{"x"};
    // make_a_root is necessary as there doesn't seem to be a way to
    // get from Input<Buffer<int32_t>> to ExternFuncArgument otherwise.
    Func make_a_root{"make_a_root"};
    Func gpu_input{"gpu_input"};

    void generate() {
        make_a_root(x) = input(x);
        ExternFuncArgument arg = make_a_root;
        gpu_input.define_extern("gpu_input", {arg}, Halide::type_of<int32_t>(), 1, NameMangling::Default, Halide::DeviceAPI::OpenCL);

        output(x) = gpu_input(x) - 41;
    }

    void schedule() {
        make_a_root.compute_root();
        gpu_input.compute_root();
        if (get_target().has_feature(Target::OpenCL)) {
            Var block_x, thread_x;
            output.gpu_tile(x, block_x, thread_x, Expr(16),
                            TailStrategy::Auto, Halide::DeviceAPI::OpenCL);
        }
    }
};

}  // namespace

HALIDE_REGISTER_GENERATOR(DefineExternOpenCLOutput, define_extern_opencl)