File: tiled_blur_generator.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (76 lines) | stat: -rw-r--r-- 2,575 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include "Halide.h"

namespace {

using Halide::saturating_cast;

template<typename T>
Halide::Expr is_interleaved(const T &p, int channels = 3) {
    return p.dim(0).stride() == channels && p.dim(2).stride() == 1 && p.dim(2).extent() == channels;
}

template<typename T>
Halide::Expr is_planar(const T &p, int channels = 3) {
    return p.dim(0).stride() == 1 && p.dim(2).extent() == channels;
}

class TiledBlur : public Halide::Generator<TiledBlur> {
public:
    Input<Buffer<uint8_t, 3>> input{"input"};
    Output<Buffer<uint8_t, 3>> output{"output"};

    void generate() {
        Expr input_float = cast<float>(input(x, y, c)) / 255.f;

        // This is the outermost pipeline, so input width and height
        // are meaningful. If you want to be able to call this outer
        // pipeline in a tiled fashion itself, then you should pass in
        // width and height as params, as with the blur above.
        brightened(x, y, c) = input_float * 1.2f;

        tiled_blur.define_extern(
            "blur2x2",
            {brightened, input.dim(0).extent(), input.dim(1).extent()},
            Float(32), 3);

        Expr tiled_blur_brightened = tiled_blur(x, y, c) * 1.2f;

        output(x, y, c) = saturating_cast<uint8_t>(tiled_blur_brightened * 255.f);
    }

    void schedule() {
        Var xi, yi;
        output.reorder(c, x, y).tile(x, y, xi, yi, 32, 32);
        tiled_blur.compute_at(output, x);
        brightened.compute_at(output, x);

        // Let's see what tiled_blur decides that it needs from
        // brightened. They should be 34x34 tiles, but clamped to fit
        // within the input, so they'll often be 33x34, 34x33, or
        // 33x33 near the boundaries
        brightened.trace_realizations();

        // Unset default constraints so that specialization works.
        input.dim(0).set_stride(Expr());
        output.dim(0).set_stride(Expr());

        // Add specialization for input and output buffers that are both planar.
        output.specialize(is_planar(input) && is_planar(output))
            .vectorize(xi, natural_vector_size<float>());

        // Add specialization for input and output buffers that are both interleaved.
        output.specialize(is_interleaved(input) && is_interleaved(output));

        // Note that other combinations (e.g. interleaved -> planar) will work
        // but be relatively unoptimized.
    }

private:
    Var x{"x"}, y{"y"}, c{"c"};
    Func tiled_blur{"tiled_blur"};
    Func brightened{"brightened"};
};

}  // namespace

HALIDE_REGISTER_GENERATOR(TiledBlur, tiled_blur)