File: benchmark_reducer.cpp

package info (click to toggle)
xtensor 0.25.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,476 kB
  • sloc: cpp: 65,302; makefile: 202; python: 171; javascript: 8
file content (100 lines) | stat: -rw-r--r-- 4,212 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/***************************************************************************
 * Copyright (c) 2016, Johan Mabille, Sylvain Corlay and Wolf Vollprecht    *
 *                                                                          *
 * Distributed under the terms of the BSD 3-Clause License.                 *
 *                                                                          *
 * The full license is in the file LICENSE, distributed with this software. *
 ****************************************************************************/

#include <benchmark/benchmark.h>

#include "xtensor/xarray.hpp"
#include "xtensor/xreducer.hpp"

namespace xt
{
    namespace reducer
    {
        template <class E, class X>
        void reducer_reducer(benchmark::State& state, const E& x, E& res, const X& axes)
        {
            for (auto _ : state)
            {
                res = sum(x, axes);
                benchmark::DoNotOptimize(res.data());
            }
        }

        template <class E, class X>
        void reducer_immediate_reducer(benchmark::State& state, const E& x, E& res, const X& axes)
        {
            for (auto _ : state)
            {
                res = sum(x, axes, evaluation_strategy::immediate);
                benchmark::DoNotOptimize(res.data());
            }
        }

        xarray<double> u = ones<double>({10, 100000});
        xarray<double> v = ones<double>({100000, 10});
        xarray<double> res2 = ones<double>({1});

        std::vector<std::size_t> axis0 = {0};
        std::vector<std::size_t> axis1 = {1};
        std::vector<std::size_t> axis_both = {0, 1};

        static auto res0 = xarray<double>::from_shape({100000});
        static auto res1 = xarray<double>::from_shape({10});

        BENCHMARK_CAPTURE(reducer_reducer, 10x100000 / axis 0, u, res0, axis0);
        BENCHMARK_CAPTURE(reducer_reducer, 10x100000 / axis 1, u, res1, axis1);
        BENCHMARK_CAPTURE(reducer_reducer, 100000x10 / axis 1, v, res1, axis0);
        BENCHMARK_CAPTURE(reducer_reducer, 100000x10 / axis 0, v, res0, axis1);
        BENCHMARK_CAPTURE(reducer_reducer, 100000x10 / axis both, v, res2, axis_both);

        BENCHMARK_CAPTURE(reducer_immediate_reducer, 10x100000 / axis 0, u, res0, axis0);
        BENCHMARK_CAPTURE(reducer_immediate_reducer, 10x100000 / axis 1, u, res1, axis1);
        BENCHMARK_CAPTURE(reducer_immediate_reducer, 100000x10 / axis 1, v, res1, axis0);
        BENCHMARK_CAPTURE(reducer_immediate_reducer, 100000x10 / axis 0, v, res0, axis1);
        BENCHMARK_CAPTURE(reducer_immediate_reducer, 100000x10 / axis both, v, res2, axis_both);

        template <class E, class X>
        inline auto reducer_manual_strided_reducer(benchmark::State& state, const E& x, E& res, const X& axes)
        {
            using value_type = typename E::value_type;
            std::size_t stride = x.strides()[axes[0]];
            std::size_t offset_end = x.strides()[axes[0]] * x.shape()[axes[0]];
            std::size_t offset_iter = 0;
            if (axes[0] == 1)
            {
                offset_iter = x.strides()[0];
            }
            else if (axes[0] == 0)
            {
                offset_iter = x.strides()[1];
            }

            for (auto _ : state)
            {
                for (std::size_t j = 0; j < res.shape()[0]; ++j)
                {
                    auto begin = x.data() + (offset_iter * j);
                    auto end = begin + offset_end;
                    value_type temp = *begin;
                    begin += stride;
                    for (; begin < end; begin += stride)
                    {
                        temp += *begin;
                    }
                    res(j) = temp;
                }
                benchmark::DoNotOptimize(res.data());
            }
        }

        BENCHMARK_CAPTURE(reducer_manual_strided_reducer, 10x100000 / axis 0, u, res0, axis0);
        BENCHMARK_CAPTURE(reducer_manual_strided_reducer, 10x100000 / axis 1, u, res1, axis1);
        BENCHMARK_CAPTURE(reducer_manual_strided_reducer, 100000x10 / axis 1, v, res1, axis0);
        BENCHMARK_CAPTURE(reducer_manual_strided_reducer, 100000x10 / axis 0, v, res0, axis1);
    }
}