File: blas.cu

package info (click to toggle)
python-escript 5.0-3
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 87,772 kB
  • ctags: 49,550
  • sloc: python: 585,488; cpp: 133,173; ansic: 18,675; xml: 3,283; sh: 690; makefile: 215
file content (98 lines) | stat: -rw-r--r-- 2,108 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include <cusp/array1d.h>
#include <cusp/blas.h>

#include <iostream>
#include <stdio.h>

#include "../timer.h"

template <typename T, typename MemorySpace=cusp::device_memory>
struct test_nrm2
{
    cusp::array1d<T,MemorySpace> x;
    const size_t n;

    test_nrm2(const size_t n)
        : n(n), x(n) {}

    void operator()(void)
    {
        cusp::blas::nrm2(x); 
    }
    
    std::string name(void) const { return (sizeof(T) == 4) ? "snrm2" : "dnrm2"; }
    
    size_t bytes(void) const { return n * sizeof(T); }
};

template <typename T, typename MemorySpace=cusp::device_memory>
struct test_dot
{
    cusp::array1d<T,MemorySpace> x, y;
    const size_t n;

    test_dot(const size_t n)
        : n(n), x(n), y(n) {}

    void operator()(void)
    {
        cusp::blas::dot(x, y);
    }
    
    std::string name(void) const { return (sizeof(T) == 4) ? "sdot" : "ddot"; }
    
    size_t bytes(void) const { return 2 * n * sizeof(T); }
};

template <typename T, typename MemorySpace=cusp::device_memory>
struct test_axpy
{
    cusp::array1d<T,MemorySpace> x, y;
    const size_t n;

    test_axpy(const size_t n)
        : n(n), x(n), y(n) {}

    void operator()(void)
    {
        cusp::blas::axpy(x, y, T(1.0));
    }
    
    std::string name(void) const { return (sizeof(T) == 4) ? "saxpy" : "daxpy"; }
    
    size_t bytes(void) const { return 3 * n * sizeof(T); }
};


template <typename Test>
void benchmark(const size_t n)
{
    Test test(n);
    test();

    timer t0;
    test();
    float ms = t0.milliseconds_elapsed();
    float bw = (test.bytes() / (ms / 1e3)) / 1e9;

    printf("%-10s %6.1f GB/s [ %8.3f ms]\n", test.name().c_str(), bw, ms);
}

int main(int argc, char ** argv)
{

    for (size_t e = 16; e < 27; e++)
    {
        size_t n = 1 << e;
        std::cout << "N = " << n << std::endl;
        benchmark< test_nrm2<float>  >(n);
        benchmark< test_nrm2<double> >(n);
        benchmark< test_dot<float>  >(n);
        benchmark< test_dot<double> >(n);
        benchmark< test_axpy<float>  >(n);
        benchmark< test_axpy<double> >(n);
    }

    return 0;
}