File: inner_loop_parallel.cpp

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (48 lines) | stat: -rw-r--r-- 1,388 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#include "Halide.h"
#include "halide_benchmark.h"
#include <cstdio>

using namespace Halide;
using namespace Halide::Tools;

int main(int argc, char **argv) {
    Target target = get_jit_target_from_environment();
    if (target.arch == Target::WebAssembly) {
        printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
        return 0;
    }

    Func f;
    Var x, y;
    f(x, y) = x + y;
    f.parallel(x);

    Pipeline p(f);

    // Having more threads than tasks shouldn't hurt performance too much.
    double correct_time = 0;

    for (int t = 2; t <= 64; t *= 2) {
        std::ostringstream ss;
        ss << "HL_NUM_THREADS=" << t;
        Halide::Internal::JITSharedRuntime::set_num_threads(t);

        p.compile_jit();
        // Start the thread pool without giving any hints as to the
        // number of tasks we'll be using.
        p.realize({t, 1});
        double min_time = benchmark([&]() { return p.realize({2, 1000000}); });

        printf("%d: %f ms\n", t, min_time * 1e3);
        if (t == 2) {
            correct_time = min_time;
        } else if (min_time > correct_time * 5) {
            printf("Unacceptable overhead when using %d threads for 2 tasks: %f ms vs %f ms\n",
                   t, min_time, correct_time);
            return 1;
        }
    }

    printf("Success!\n");
    return 0;
}