File: allocator_perf.cc

package info (click to toggle)
seastar 25.05.0-1
  • links: PTS
  • area: main
  • in suites: sid
  • size: 7,256 kB
  • sloc: cpp: 89,250; python: 5,066; ansic: 3,452; sh: 1,272; xml: 177; makefile: 9
file content (184 lines) | stat: -rw-r--r-- 6,572 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
/*
 * This file is open source software, licensed to you under the terms
 * of the Apache License, Version 2.0 (the "License").  See the NOTICE file
 * distributed with this work for additional information regarding copyright
 * ownership.  You may not use this file except in compliance with the License.
 *
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
/*
 * Copyright (C) 2023 ScyllaDB
 */

#include <seastar/testing/perf_tests.hh>

#include <seastar/core/memory.hh>
#include <sys/mman.h>

struct alloc_bench {

    static constexpr size_t small_alloc_size = 8;
    static constexpr size_t large_alloc_size = 32000;
    static constexpr size_t MAX_POINTERS = 1000;

    enum alloc_test_flags : uint8_t {
        MEASURE_ALLOC = 1,
        MEASURE_FREE  = 2
    };

    static constexpr alloc_test_flags MEASURE_BOTH = (alloc_test_flags)(MEASURE_ALLOC | MEASURE_FREE);

    using alloc_fn = void * (size_t);
    using free_fn = void (void *);

    struct c_funcs {
        static constexpr alloc_fn * const alloc = std::malloc;
        static constexpr free_fn * const free = std::free;
    };

    struct cpp_operator_funcs {
        static void * alloc(size_t size) { return operator new(size); }
        static void free(void *p){ operator delete(p); }
    };

    struct cpp_array_funcs {
        static void * alloc(size_t size) { return new char[size]; }
        static void free(void *p){ delete [] static_cast<char *>(p); }
    };

    template <size_t alloc_size, alloc_test_flags F, typename A>
    size_t alloc_test() {
        // in some cases we want to measure allocation, on deallocation or
        // both, and this helper facilitates that with a minimum of fuss
        // it always executes fn on every pointer in the array, but only
        // measures when should_measure is true
        auto run_maybe_measure = [this](bool should_measure, auto fn) {
            if (should_measure) {
                perf_tests::start_measuring_time();
            }
            for (auto &p : _pointers) {
                fn(p);
            }
            if (should_measure) {
                perf_tests::stop_measuring_time();
            }
        };

        run_maybe_measure(F & MEASURE_ALLOC, [](auto& p) { p = A::alloc(alloc_size); });
        run_maybe_measure(F & MEASURE_FREE, [](auto& p) { A::free(p); });
        return _pointers.size();
    }

    std::array<void *, MAX_POINTERS> _pointers{};
};

PERF_TEST_F(alloc_bench, malloc_only)      { return alloc_test<small_alloc_size, MEASURE_ALLOC, c_funcs>(); }
PERF_TEST_F(alloc_bench, free_only)        { return alloc_test<small_alloc_size, MEASURE_FREE, c_funcs>(); }
PERF_TEST_F(alloc_bench, malloc_free)      { return alloc_test<small_alloc_size, MEASURE_BOTH, c_funcs>(); }

PERF_TEST_F(alloc_bench, op_new_only)      { return alloc_test<small_alloc_size, MEASURE_ALLOC, cpp_operator_funcs>(); }
PERF_TEST_F(alloc_bench, op_delete_only)   { return alloc_test<small_alloc_size, MEASURE_FREE, cpp_operator_funcs>(); }
PERF_TEST_F(alloc_bench, op_new_delete)    { return alloc_test<small_alloc_size, MEASURE_BOTH, cpp_operator_funcs>(); }

PERF_TEST_F(alloc_bench, new_array_only)   { return alloc_test<small_alloc_size, MEASURE_ALLOC, cpp_array_funcs>(); }
PERF_TEST_F(alloc_bench, delete_array_only){ return alloc_test<small_alloc_size, MEASURE_FREE, cpp_array_funcs>(); }
PERF_TEST_F(alloc_bench, array_new_delete) { return alloc_test<small_alloc_size, MEASURE_BOTH, cpp_array_funcs>(); }

PERF_TEST_F(alloc_bench, alloc_only_large) { return alloc_test<large_alloc_size, MEASURE_ALLOC, c_funcs>(); }
PERF_TEST_F(alloc_bench, free_only_large ) { return alloc_test<large_alloc_size, MEASURE_FREE, c_funcs>(); }
PERF_TEST_F(alloc_bench, alloc_free_large) { return alloc_test<large_alloc_size, MEASURE_BOTH, c_funcs>(); }

// this test doesn't serve much value. It should take about 10 times as the
// single alloc test above. If not, something is wrong.
PERF_TEST_F(alloc_bench, single_alloc_and_free_small_many)
{
    const std::size_t allocs = 10;
    static_assert(allocs <= MAX_POINTERS);

    for (std::size_t i = 0; i < allocs; ++i) {
        auto ptr = malloc(10);
        perf_tests::do_not_optimize(ptr);
        _pointers[i] = ptr;
    }

    for (std::size_t i = 0; i < allocs; ++i) {
        free(_pointers[i]);
    }
}

// Allocate from more than one page. Should also not suffer a perf penalty
// As we should have at least min free of 50 objects (hard coded internally)
PERF_TEST_F(alloc_bench, single_alloc_and_free_small_many_cross_page)
{
    const std::size_t alloc_size = 1024;
    const std::size_t allocs = (seastar::memory::page_size / alloc_size) + 1;

    static_assert(allocs <= MAX_POINTERS);

    for (std::size_t i = 0; i < allocs; ++i) {
        auto ptr = malloc(alloc_size);
        perf_tests::do_not_optimize(ptr);
        _pointers[i] = ptr;
    }

    for (std::size_t i = 0; i < allocs; ++i) {
        free(_pointers[i]);
    }
}

// Include an allocation in the benchmark that will require going to the large pool
// for more data for the small pool
PERF_TEST_F(alloc_bench, single_alloc_and_free_small_many_cross_page_alloc_more)
{
    const std::size_t alloc_size = 1024;
    const std::size_t allocs = 101; // at 1024 alloc size we will have a _max_free of 100 objects

    for (std::size_t i = 0; i < allocs; ++i) {
        auto ptr = malloc(alloc_size);
        perf_tests::do_not_optimize(ptr);
        _pointers[i] = ptr;
    }

    for (std::size_t i = 0; i < allocs; ++i) {
        free(_pointers[i]);
    }
}

template <typename DistType>
static size_t dist_bench() {
    std::random_device rd_device;
    std::mt19937_64 random_gen(rd_device());

    constexpr size_t ITERS = 10000;
    size_t rate = 3'000'000;
    perf_tests::do_not_optimize(rate);

    DistType dist(rate);

    typename DistType::result_type sum = 0;

    for (size_t i = 0; i < ITERS; i++) {
        sum += dist(random_gen);
    }

    perf_tests::do_not_optimize(sum);

    return ITERS;
}

PERF_TEST(random_sampling, exp_dist) {
    return dist_bench<std::exponential_distribution<double>>();
}

PERF_TEST(random_sampling, geo_dist) {
    return dist_bench<std::geometric_distribution<size_t>>();
}