File: matrix_benchmark.cpp

package info (click to toggle)
libstxxl 1.4.0-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 5,256 kB
  • ctags: 6,830
  • sloc: cpp: 39,594; ansic: 4,217; perl: 566; sh: 555; xml: 174; makefile: 21
file content (154 lines) | stat: -rw-r--r-- 6,746 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/***************************************************************************
 *  tools/benchmarks/matrix_benchmark.cpp
 *
 *  Part of the STXXL. See http://stxxl.sourceforge.net
 *
 *  Copyright (C) 2010-2011 Raoul Steffen <R-Steffen@gmx.de>
 *
 *  Distributed under the Boost Software License, Version 1.0.
 *  (See accompanying file LICENSE_1_0.txt or copy at
 *  http://www.boost.org/LICENSE_1_0.txt)
 **************************************************************************/

#include <iostream>
#include <limits>

#include <stxxl/vector>
#include <stxxl/stream>
#include <stxxl/bits/common/cmdline.h>
#include <stxxl/bits/containers/matrix.h>

using stxxl::uint64;
using stxxl::int_type;

int main(int argc, char** argv)
{
    #ifndef STXXL_MATRIX_BLOCK_ORDER
    const int block_order = 1568;                     // must be a multiple of 32, assuming at least 4 bytes element size
    #else
    const int block_order = STXXL_MATRIX_BLOCK_ORDER; // must be a multiple of 32, assuming at least 4 bytes element size
    #endif

    int rank = 10000;
    uint64 internal_memory = 256 * 1024 * 1024;
    int mult_algo_num = 5;
    int sched_algo_num = 2;

    stxxl::cmdline_parser cp;
    cp.add_int('r', "rank", "N", "rank of the matrices, default: 10000", rank);
    cp.add_bytes('m', "memory", "L", "internal memory to use, default: 256 MiB", internal_memory);
    cp.add_int('a', "mult-algo", "N", "use multiplication-algorithm number N\n  available are:\n   0: naive_multiply_and_add\n   1: recursive_multiply_and_add\n   2: strassen_winograd_multiply_and_add\n   3: multi_level_strassen_winograd_multiply_and_add\n   4: strassen_winograd_multiply (block-interleaved pre- and postadditions)\n   5: strassen_winograd_multiply_and_add_interleaved (block-interleaved preadditions)\n   6: multi_level_strassen_winograd_multiply_and_add_block_grained\n   -1: internal multiplication\n   -2: pure BLAS\n  default: 5", mult_algo_num);
    cp.add_int('s', "scheduling-algo", "N", "use scheduling-algorithm number N\n  available are:\n   0: online LRU\n   1: offline LFD\n   2: offline LRU prefetching\n  default: 2", sched_algo_num);

    cp.set_description("stxxl matrix test");
    cp.set_author("Raoul Steffen <R-Steffen@gmx.de>");

    if (!cp.process(argc, argv))
        return -1;

    STXXL_MSG("multiplying two full double matrices of rank " << rank << ", block order " << block_order
                                                              << " using " << internal_memory / 1024 / 1024 << "MiB internal memory, multiplication-algo "
                                                              << mult_algo_num << ", scheduling-algo " << sched_algo_num);

    typedef double value_type;

    stxxl::stats_data stats_before, stats_after;
    stxxl::matrix_operation_statistic_data matrix_stats_before, matrix_stats_after;

    if (mult_algo_num == -2)
    {
        const int_type size = rank * rank;
        value_type* A = new value_type[size];
        value_type* B = new value_type[size];
        value_type* C = new value_type[size];
        // write A and B
        for (int_type i = 0; i < size; ++i)
            A[i] = B[i] = 1;
        // evict A and B by accessing lots of memory
        int_type int_mem_size = 50 * 2 ^ 30 / sizeof(int_type);
        assert(int_mem_size > 0);
        int_type* D = new int_type[int_mem_size];
        for (int_type i = 0; i < int_mem_size; ++i)
            D[i] = 1;
        delete[] D;
        #if STXXL_BLAS
        stats_before = *stxxl::stats::get_instance();
        gemm_wrapper(rank, rank, rank,
                     value_type(1), false, A,
                     false, B,
                     value_type(0), false, C);
        stats_after = *stxxl::stats::get_instance();
        #else
        STXXL_ERRMSG("internal multiplication is only available for testing with blas");
        #endif
        delete[] A;
        delete[] B;
        delete[] C;
    }
    else
    {
        typedef stxxl::block_scheduler<stxxl::matrix_swappable_block<value_type, block_order> > bst;
        typedef stxxl::matrix<value_type, block_order> mt;
        typedef mt::row_major_iterator mitt;
        typedef mt::const_row_major_iterator cmitt;

        bst* b_s = new bst(internal_memory);  // the block_scheduler may use internal_memory byte for caching
        bst& bs = *b_s;
        mt* a = new mt(bs, rank, rank),
        * b = new mt(bs, rank, rank),
        * c = new mt(bs, rank, rank);

        STXXL_MSG("writing input matrices");
        for (mitt mit = a->begin(); mit != a->end(); ++mit)
            *mit = 1;
        for (mitt mit = b->begin(); mit != b->end(); ++mit)
            *mit = 1;

        bs.flush();
        STXXL_MSG("start of multiplication");
        matrix_stats_before.set();
        stats_before = *stxxl::stats::get_instance();
        if (mult_algo_num >= 0)
            *c = a->multiply(*b, mult_algo_num, sched_algo_num);
        else
            *c = a->multiply_internal(*b, sched_algo_num);
        bs.flush();
        stats_after = *stxxl::stats::get_instance();
        matrix_stats_after.set();
        STXXL_MSG("end of multiplication");

        matrix_stats_after = matrix_stats_after - matrix_stats_before;
        STXXL_MSG(matrix_stats_after);
        stats_after = stats_after - stats_before;
        STXXL_MSG(stats_after);
        {
            int_type num_err = 0;
            for (cmitt mit = c->cbegin(); mit != c->cend(); ++mit)
                num_err += (*mit != rank);
            if (num_err)
                STXXL_ERRMSG("c had " << num_err << " errors");
        }

        delete a;
        delete b;
        delete c;
        delete b_s;
    }

    STXXL_MSG("end of test");
    std::cout << "@";
    std::cout << " ra " << rank << " bo " << block_order << " im " << internal_memory / 1024 / 1024
              << " ma " << mult_algo_num << " sa " << sched_algo_num;
    std::cout << " mu " << matrix_stats_after.block_multiplication_calls
              << " mus " << matrix_stats_after.block_multiplications_saved_through_zero
              << " ad " << matrix_stats_after.block_addition_calls
              << " ads " << matrix_stats_after.block_additions_saved_through_zero;
    std::cout << " t " << stats_after.get_elapsed_time()
              << " r " << stats_after.get_reads() << " w " << stats_after.get_writes()
              << " rt " << stats_after.get_read_time() << " rtp " << stats_after.get_pread_time()
              << " wt " << stats_after.get_write_time() << " wtp " << stats_after.get_pwrite_time()
              << " rw " << stats_after.get_wait_read_time() << " ww " << stats_after.get_wait_write_time()
              << " iotp " << stats_after.get_pio_time();
    std::cout << std::endl;
    return 0;
}