File: async_parallel_generator.cpp

package info (click to toggle)
halide 14.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 49,124 kB
  • sloc: cpp: 238,722; makefile: 4,303; python: 4,047; java: 1,575; sh: 1,384; pascal: 211; xml: 165; javascript: 43; ansic: 34
file content (37 lines) | stat: -rw-r--r-- 1,421 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include "Halide.h"

namespace Ext {
HalideExtern_5(int, sleeper, int, int, int, int, int)
}

using namespace Halide;

class AsyncParallel : public Generator<AsyncParallel> {
public:
    // Define a pipeline that needs a mess of threads due to nested parallelism.

    Output<Func> output{"output", Int(32), 3};

    void generate() {
        Func consumer_2{"consumer_2"};
        Func producer_1{"producer_1"};
        Func consumer_1{"consumer_1"};
        Func producer_2{"producer_2"};

        Var x, y, z;

        producer_1(x, y, z) = x + y + Ext::sleeper(0, x, y, z, z);
        consumer_1(x, y, z) = Ext::sleeper(1, x, y, z, producer_1(x - 1, y, z)) + Ext::sleeper(2, x, y, z, producer_1(x + 1, y, z));
        producer_2(x, y, z) = Ext::sleeper(3, x, y, z, consumer_1(x, y - 1, z)) + Ext::sleeper(4, x, y, z, consumer_1(x, y + 1, z));
        consumer_2(x, y, z) = Ext::sleeper(5, x, y, z, producer_2(x - 1, y, z)) + Ext::sleeper(6, x, y, z, producer_2(x + 1, y, z));
        output(x, y, z) = Ext::sleeper(7, x, y, z, consumer_2(x, y, z));

        consumer_2.compute_at(output, z);
        producer_2.store_at(consumer_2, y).compute_at(consumer_2, x).async();
        consumer_1.store_at(output, z).compute_at(consumer_2, y).async();
        producer_1.store_at(consumer_2, y).compute_at(consumer_1, x).async();
        output.parallel(z);
    }
};

HALIDE_REGISTER_GENERATOR(AsyncParallel, async_parallel)