File: blur2x2_generator.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (66 lines) | stat: -rw-r--r-- 2,238 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include "Halide.h"

namespace {

template<typename T>
Halide::Expr is_interleaved(const T &p, int channels = 3) {
    return p.dim(0).stride() == channels && p.dim(2).stride() == 1 && p.dim(2).extent() == channels;
}

template<typename T>
Halide::Expr is_planar(const T &p, int channels = 3) {
    return p.dim(0).stride() == 1 && p.dim(2).extent() == channels;
}

// A trivial 2x2 blur.
class Blur2x2 : public Halide::Generator<Blur2x2> {
public:
    Input<Buffer<float, 3>> input{"input"};
    Input<int32_t> width{"width"};
    Input<int32_t> height{"height"};
    Output<Buffer<float, 3>> blur{"blur"};

    void generate() {
        // We pass in parameters to tell us where the boundary
        // condition kicks in; this allows us to decouple from the size of the
        // input tile (if any).

        // (In fact, if we are being used as an extern stage for tiled processing,
        // clamping accesses to lie within the input tile using input.min() and
        // input.extent() would tell the calling kernel we can cope with any size
        // input, so it would always pass us 1x1 tiles.)

        Func input_clamped =
            Halide::BoundaryConditions::repeat_edge(input, {{0, width}, {0, height}});

        blur(x, y, c) =
            (input_clamped(x - 1, y, c) + input_clamped(x + 1, y, c) +
             input_clamped(x, y - 1, c) + input_clamped(x, y + 1, c)) /
            4.0f;
    }

    void schedule() {
        // Unset default constraints so that specialization works.
        input.dim(0).set_stride(Expr());
        blur.dim(0).set_stride(Expr());

        // Add specialization for input and output buffers that are both planar.
        blur.specialize(is_planar(input) && is_planar(blur))
            .vectorize(x, natural_vector_size<float>());

        // Add specialization for input and output buffers that are both interleaved.
        blur.specialize(is_interleaved(input) && is_interleaved(blur))
            .reorder(c, x, y)
            .vectorize(c);

        // Note that other combinations (e.g. interleaved -> planar) will work
        // but be relatively unoptimized.
    }

private:
    Var x{"x"}, y{"y"}, c{"c"};
};

}  // namespace

HALIDE_REGISTER_GENERATOR(Blur2x2, blur2x2)