File: likely.cpp

package info (click to toggle)
halide 21.0.0-4
links: PTS, VCS
area: main
in suites: forky, sid
size: 55,752 kB
sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (321 lines) | stat: -rw-r--r-- 9,744 bytes
parent folder | download | duplicates (3)
#include "Halide.h"
#include <stdio.h>

namespace {

using namespace Halide;
using namespace Halide::Internal;
using std::string;

// Count the number of stores to a given func, and the number of calls to sin
class Counter : public IRVisitor {
    string func;

    using IRVisitor::visit;

    void visit(const Store *op) override {
        IRVisitor::visit(op);
        if (op->name == func) {
            store_count++;
        }
    }

    void visit(const Call *op) override {
        IRVisitor::visit(op);
        if (op->name == "sin_f32") {
            sin_count++;
        }
    }

public:
    int store_count, sin_count;
    Counter(string f)
        : func(f), store_count(0), sin_count(0) {
    }
};

// Check that the number of calls to sin is correct.
class CheckSinCount : public IRMutator {
    int correct;

public:
    using IRMutator::mutate;

    Stmt mutate(const Stmt &s) override {
        Counter c("");
        s.accept(&c);
        if (c.sin_count != correct) {
            printf("There were %d sin calls instead of %d\n", c.sin_count, correct);
            exit(1);
        }
        return s;
    }

    CheckSinCount(int c)
        : correct(c) {
    }
};

// Check that the number of stores to a given func is correct
class CheckStoreCount : public IRMutator {
    string func;
    int correct;

public:
    using IRMutator::mutate;

    Stmt mutate(const Stmt &s) override {
        Counter c(func);
        s.accept(&c);
        if (c.store_count != correct) {
            printf("There were %d stores to %s instead of %d\n", c.store_count, func.c_str(), correct);
            debug(1) << s << "\n";
            exit(1);
        }
        return s;
    }

    CheckStoreCount(string f, int c)
        : func(f), correct(c) {
    }
};

void count_partitions(Func g, int correct) {
    g.add_custom_lowering_pass(new CheckStoreCount(g.name(), correct));
    g.compile_to_module(g.infer_arguments());
}

void count_sin_calls(Func g, int correct) {
    g.add_custom_lowering_pass(new CheckSinCount(correct));
    g.compile_to_module(g.infer_arguments());
}

}  // namespace

int main(int argc, char **argv) {
    Func f;
    Var x;
    f(x) = x;
    f.compute_root();

    // Halide will partition a loop into three pieces in a few
    // situations. The pieces are 1) a messy prologue, 2) a clean
    // steady state, and 3) a messy epilogue. One way to trigger this
    // is if you use a boundary condition helper:

    {
        Func g = BoundaryConditions::repeat_edge(f, {{0, 100}});
        count_partitions(g, 3);

        // check that disabling works.
        g.partition(x, Partition::Never);
        count_partitions(g, 1);
    }

    // If you vectorize or otherwise split, then the last vector
    // (which gets shifted leftwards) is its own partition. This
    // removes some clamping logic from the inner loop.

    {
        Func g;
        g(x) = f(x);
        g.vectorize(x, 8);
        count_partitions(g, 2);

        // check that disabling works.
        g.partition(x, Partition::Never);
        count_partitions(g, 1);
    }

    // The slicing applies to every loop level starting from the outermost one,
    // but only recursively simplifies the clean steady state. It either splits
    // things three (start, middle, end). So adding a boundary condition to a 2D
    // computation will produce 5 code paths for the top, bottom, left, right,
    // and center of the image. With explicit control over loop partitioning, we
    // might produce more or fewer.
    {
        Var y;
        Func g;
        g(x, y) = x + y;
        g.compute_root();
        Func h = BoundaryConditions::mirror_image(g, {{0, 10}, {0, 10}});
        count_partitions(h, 5);

        {
            debug(1) << "Never partition y, always partition x:\n";
            Func h2 = h;
            h2.partition(x, Partition::Always);
            h2.partition(y, Partition::Never);
            count_partitions(h2, 3);  // We expect left-center-right
        }

        {
            debug(1) << "Never partition x, always partition y:\n";
            Func h2 = h;
            h2.partition(x, Partition::Never);
            h2.partition(y, Partition::Always);
            count_partitions(h2, 3);  // We expect top-middle-bottom
        }

        {
            debug(1) << "Never partition x and y.\n";
            Func h2 = h;
            h2.partition(x, Partition::Never);
            h2.partition(y, Partition::Never);
            count_partitions(h2, 1);
        }

        {
            debug(1) << "Always partition x and y.\n";
            Func h2 = h;
            h2.partition(x, Partition::Always);
            h2.partition(y, Partition::Always);
            // All loops get partitioned, including the tails of outer loops, so we expect 9 zones:
            /*
               ----------------------------------------------
               | top left    | top middle    | top right    |
               | ------------------------------------------ |
               | left        | middle        | right        |
               | ------------------------------------------ |
               | bottom left | bottom middle | bottom right |
               ----------------------------------------------
            */
            count_partitions(h2, 9);
        }
    }

    // If you split and also have a boundary condition, or have
    // multiple boundary conditions at play (e.g. because you're
    // blurring an inlined Func that uses a boundary condition), then
    // there are still only three partitions. The steady state is the
    // slice of the loop where *all* of the boundary conditions and
    // splitting logic simplify away.
    {
        Func g = BoundaryConditions::mirror_interior(f, {{0, 10}});
        Func h;
        Param<int> t1, t2;
        h(x) = g(x - 1) + g(x + 1);
        h.vectorize(x, 8);
        count_partitions(h, 3);
    }

    // You can manually control the splitting behavior using the
    // 'likely' intrinsic. When used on one side of a select, min,
    // max, or clamp, it tags the select, min, max, or clamp as likely
    // to simplify to that expression in the steady state case, and
    // tries to solve for loop variable values for which this is true.
    {
        // So this code should produce a prologue that evaluates to sin(x), and
        // a steady state that evaluates to 1:
        Func g;
        g(x) = select(x < 10, sin(x), likely(1.0f));
        // There should be two partitions
        count_partitions(g, 2);
        // But only one should call sin
        count_sin_calls(g, 1);
    }

    {
        // This code should produce a prologue and epilogue that
        // evaluate sin(x), and a steady state that evaluates to 1:
        Func g;
        g(x) = select(x < 10 || x > 100, sin(x), likely(1.0f));
        // There should be three partitions
        count_partitions(g, 3);
        // With calls to sin in the prologue and epilogue.
        count_sin_calls(g, 2);
    }

    // As a specialize case, we treat clamped ramps as likely to
    // simplify to the clamped expression. This handles the many
    // existing cases where people have written their boundary
    // condition manually using clamp.
    {
        Func g;
        g(x) = f(clamp(x, 0, 10));  // treated as clamp(likely(x), 0, 10)
        g.vectorize(x, 8);
        count_partitions(g, 3);

        // check that disabling works.
        g.partition(x, Partition::Never);
        count_partitions(g, 1);
    }

    // Using the likely intrinsic pulls some IR relating to the
    // condition outside of the loop. We'd better check that this
    // respects lets and doesn't do any combinatorial expansion. We'll
    // do this with a nasty comparison:
    {
        Func g;
        Var y;

        // Have an inner reduction loop that the comparisons depend on
        // to make things harder.
        RDom r(0, 5);

        const int N = 25;

        // Make some nasty expressions to compare to.
        Expr e[N];
        e[0] = y;
        for (int i = 1; i < N; i++) {
            e[i] = e[i - 1] * e[i - 1] + y + r;
        }
        // Make a nasty condition that uses all of these.
        Expr nasty = cast<bool>(1);
        for (int i = 0; i < N; i++) {
            nasty = nasty && (x * (i + 1) < e[i]);
        }

        // Have an innermost loop over c to complicate things further.
        Var c;
        g(c, x, y) = sum(select(nasty, likely(10), c + r));

        // Check that it doesn't take the age of the world to compile,
        // and that it produces the right number of partitions.
        count_partitions(g, 3);
    }

    // Make sure partitions that occur outside of the actual bounds
    // don't mess things up.
    {
        Func g;
        Var x;
        Param<int> limit;
        g(x) = select(x > limit, likely(3), 2);

        // If either of these realize calls iterates from 0 to limit,
        // and then from limit to 10, we'll have a nice segfault.
        limit.set(10000000);
        Buffer<int> result = g.realize({10});

        limit.set(-10000000);
        result = g.realize({10});
    }

    // Test for the bug described in https://github.com/halide/Halide/issues/7929
    {
        Func f, g, h;
        Var x, y;

        f(x, y) = x;
        f.compute_root();

        Param<int> p;
        g = BoundaryConditions::repeat_edge(f, {{0, p}, {Expr(), Expr()}});

        h(x, y) = g(x, y) + g(x, y + 1) + g(x, y + 2);

        count_partitions(h, 3);

        // Same thing with vectorization too.
        h.vectorize(x, 8);
        count_partitions(h, 3);
    }

    // The performance of this behavior is tested in
    // test/performance/boundary_conditions.cpp

    printf("Success!\n");
    return 0;
}