1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
|
/***************************************************************************
* tools/benchmarks/matrix_benchmark.cpp
*
* Part of the STXXL. See http://stxxl.sourceforge.net
*
* Copyright (C) 2010-2011 Raoul Steffen <R-Steffen@gmx.de>
*
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
**************************************************************************/
#include <iostream>
#include <limits>
#include <stxxl/vector>
#include <stxxl/stream>
#include <stxxl/bits/common/cmdline.h>
#include <stxxl/bits/containers/matrix.h>
using stxxl::uint64;
using stxxl::int_type;
int main(int argc, char** argv)
{
#ifndef STXXL_MATRIX_BLOCK_ORDER
const int block_order = 1568; // must be a multiple of 32, assuming at least 4 bytes element size
#else
const int block_order = STXXL_MATRIX_BLOCK_ORDER; // must be a multiple of 32, assuming at least 4 bytes element size
#endif
int rank = 10000;
uint64 internal_memory = 256 * 1024 * 1024;
int mult_algo_num = 5;
int sched_algo_num = 2;
stxxl::cmdline_parser cp;
cp.add_int('r', "rank", "N", "rank of the matrices, default: 10000", rank);
cp.add_bytes('m', "memory", "L", "internal memory to use, default: 256 MiB", internal_memory);
cp.add_int('a', "mult-algo", "N", "use multiplication-algorithm number N\n available are:\n 0: naive_multiply_and_add\n 1: recursive_multiply_and_add\n 2: strassen_winograd_multiply_and_add\n 3: multi_level_strassen_winograd_multiply_and_add\n 4: strassen_winograd_multiply (block-interleaved pre- and postadditions)\n 5: strassen_winograd_multiply_and_add_interleaved (block-interleaved preadditions)\n 6: multi_level_strassen_winograd_multiply_and_add_block_grained\n -1: internal multiplication\n -2: pure BLAS\n default: 5", mult_algo_num);
cp.add_int('s', "scheduling-algo", "N", "use scheduling-algorithm number N\n available are:\n 0: online LRU\n 1: offline LFD\n 2: offline LRU prefetching\n default: 2", sched_algo_num);
cp.set_description("stxxl matrix test");
cp.set_author("Raoul Steffen <R-Steffen@gmx.de>");
if (!cp.process(argc, argv))
return -1;
STXXL_MSG("multiplying two full double matrices of rank " << rank << ", block order " << block_order
<< " using " << internal_memory / 1024 / 1024 << "MiB internal memory, multiplication-algo "
<< mult_algo_num << ", scheduling-algo " << sched_algo_num);
typedef double value_type;
stxxl::stats_data stats_before, stats_after;
stxxl::matrix_operation_statistic_data matrix_stats_before, matrix_stats_after;
if (mult_algo_num == -2)
{
const int_type size = rank * rank;
value_type* A = new value_type[size];
value_type* B = new value_type[size];
value_type* C = new value_type[size];
// write A and B
for (int_type i = 0; i < size; ++i)
A[i] = B[i] = 1;
// evict A and B by accessing lots of memory
int_type int_mem_size = 50 * 2 ^ 30 / sizeof(int_type);
assert(int_mem_size > 0);
int_type* D = new int_type[int_mem_size];
for (int_type i = 0; i < int_mem_size; ++i)
D[i] = 1;
delete[] D;
#if STXXL_BLAS
stats_before = *stxxl::stats::get_instance();
gemm_wrapper(rank, rank, rank,
value_type(1), false, A,
false, B,
value_type(0), false, C);
stats_after = *stxxl::stats::get_instance();
#else
STXXL_ERRMSG("internal multiplication is only available for testing with blas");
#endif
delete[] A;
delete[] B;
delete[] C;
}
else
{
typedef stxxl::block_scheduler<stxxl::matrix_swappable_block<value_type, block_order> > bst;
typedef stxxl::matrix<value_type, block_order> mt;
typedef mt::row_major_iterator mitt;
typedef mt::const_row_major_iterator cmitt;
bst* b_s = new bst(internal_memory); // the block_scheduler may use internal_memory byte for caching
bst& bs = *b_s;
mt* a = new mt(bs, rank, rank),
* b = new mt(bs, rank, rank),
* c = new mt(bs, rank, rank);
STXXL_MSG("writing input matrices");
for (mitt mit = a->begin(); mit != a->end(); ++mit)
*mit = 1;
for (mitt mit = b->begin(); mit != b->end(); ++mit)
*mit = 1;
bs.flush();
STXXL_MSG("start of multiplication");
matrix_stats_before.set();
stats_before = *stxxl::stats::get_instance();
if (mult_algo_num >= 0)
*c = a->multiply(*b, mult_algo_num, sched_algo_num);
else
*c = a->multiply_internal(*b, sched_algo_num);
bs.flush();
stats_after = *stxxl::stats::get_instance();
matrix_stats_after.set();
STXXL_MSG("end of multiplication");
matrix_stats_after = matrix_stats_after - matrix_stats_before;
STXXL_MSG(matrix_stats_after);
stats_after = stats_after - stats_before;
STXXL_MSG(stats_after);
{
int_type num_err = 0;
for (cmitt mit = c->cbegin(); mit != c->cend(); ++mit)
num_err += (*mit != rank);
if (num_err)
STXXL_ERRMSG("c had " << num_err << " errors");
}
delete a;
delete b;
delete c;
delete b_s;
}
STXXL_MSG("end of test");
std::cout << "@";
std::cout << " ra " << rank << " bo " << block_order << " im " << internal_memory / 1024 / 1024
<< " ma " << mult_algo_num << " sa " << sched_algo_num;
std::cout << " mu " << matrix_stats_after.block_multiplication_calls
<< " mus " << matrix_stats_after.block_multiplications_saved_through_zero
<< " ad " << matrix_stats_after.block_addition_calls
<< " ads " << matrix_stats_after.block_additions_saved_through_zero;
std::cout << " t " << stats_after.get_elapsed_time()
<< " r " << stats_after.get_reads() << " w " << stats_after.get_writes()
<< " rt " << stats_after.get_read_time() << " rtp " << stats_after.get_pread_time()
<< " wt " << stats_after.get_write_time() << " wtp " << stats_after.get_pwrite_time()
<< " rw " << stats_after.get_wait_read_time() << " ww " << stats_after.get_wait_write_time()
<< " iotp " << stats_after.get_pio_time();
std::cout << std::endl;
return 0;
}
|