File: benchmark.hpp

package info (click to toggle)
rocblas 6.4.4-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,082,776 kB
  • sloc: cpp: 244,923; f90: 50,012; python: 50,003; sh: 24,630; asm: 8,917; makefile: 150; ansic: 107; xml: 36; awk: 14
file content (96 lines) | stat: -rw-r--r-- 3,468 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/* ************************************************************************
 * Copyright (C) 2018-2024 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop-
 * ies of the Software, and to permit persons to whom the Software is furnished
 * to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM-
 * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE-
 * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ************************************************************************ */

#pragma once

//!
//! @brief Implementation of a common benchmark code
//!
template <typename LAMBDA>
class Benchmark
{
public:
    //!
    //! @brief Constructor
    //! @param lambda_to_benchmark     The lambda to be benchmarked.
    //! @param stream                  The Hip stream.
    //! @param arg                     Arguments struct, arguments to run benchmark
    //! @param flush_batch_count       number of copies of arrays in rotating buffer, set to 1 for no rotating buffer
    //!
    Benchmark(LAMBDA           lambda_to_benchmark,
              hipStream_t      stream,
              const Arguments& arg,
              size_t           flush_batch_count)
        : m_lambda_to_benchmark(lambda_to_benchmark)
        , m_stream(stream)
        , m_arg(arg)
        , m_flush_batch_count(flush_batch_count)
        , m_hot_time(0)
        , m_cold_time(0){};

    void   run_timer();
    double get_hot_time() const;
    double get_cold_time() const;

private:
    LAMBDA      m_lambda_to_benchmark;
    Arguments   m_arg;
    hipStream_t m_stream;
    size_t      m_flush_batch_count;
    double      m_hot_time;
    double      m_cold_time;
};

// timer calls m_lambda_to_benchmark in a loop m_arg.iters + m_arg.cold_iters times
// timer returns the time to call the lambda m_arg.iters times
// timer rotates through m_flush_batch_count copies of arrays to flush MALL
template <typename LAMBDA>
void Benchmark<LAMBDA>::run_timer()
{
    double cold_start, cold_end, hot_end;
    cold_start = get_time_us_sync(m_stream);
    for(int iter = 0; iter < m_arg.iters + m_arg.cold_iters; iter++)
    {
        if(iter == m_arg.cold_iters)
            cold_end = get_time_us_sync(m_stream);

        int flush_index = iter % m_flush_batch_count;

        m_lambda_to_benchmark(flush_index);
    }

    hot_end     = get_time_us_sync(m_stream);
    m_cold_time = cold_end - cold_start;
    m_hot_time  = hot_end - cold_end;
}

template <typename LAMBDA>
double Benchmark<LAMBDA>::get_hot_time() const
{
    return this->m_hot_time;
}

template <typename LAMBDA>
double Benchmark<LAMBDA>::get_cold_time() const
{
    return this->m_cold_time;
}