1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
|
/*
Copyright 2005-2008 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "common.h"
#include "tbb/tick_count.h"
#include "tbb/task.h"
#include "tbb/task_scheduler_init.h"
#include <cstdlib>
#include <cstdio>
#include <cstring>
// The performance of this example can be significantly better when
// the objects are allocated by the scalable_allocator instead of the
// default "operator new". The reason is that the scalable_allocator
// typically packs small objects more tightly than the default "operator new",
// resulting in a smaller memory footprint, and thus more efficient use of
// cache and virtual memory. Also the scalable_allocator works faster for
// multi-threaded allocations.
//
// Pass -stdmalloc as the 1st command line parameter to use the default "operator new"
// and see the performance difference.
#include "tbb/scalable_allocator.h"
using namespace std;
static double Pi = 3.14159265358979;
const bool tbbmalloc = true;
const bool stdmalloc = false;
template<bool use_tbbmalloc>
class TreeMaker {
class SubTreeCreationTask: public tbb::task {
TreeNode*& my_root;
bool is_continuation;
typedef TreeMaker<use_tbbmalloc> MyTreeMaker;
public:
SubTreeCreationTask( TreeNode*& root, long number_of_nodes ) : my_root(root), is_continuation(false) {
my_root = MyTreeMaker::allocate_node();
my_root->node_count = number_of_nodes;
my_root->value = Value(Pi*number_of_nodes);
}
tbb::task* execute() {
tbb::task* next = NULL;
if( !is_continuation ) {
long subtree_size = my_root->node_count - 1;
if( subtree_size<1000 ) { /* grainsize */
my_root->left = MyTreeMaker::do_in_one_thread(subtree_size/2);
my_root->right = MyTreeMaker::do_in_one_thread(subtree_size - subtree_size/2);
} else {
// Create tasks before spawning any of them.
tbb::task* a = new( allocate_child() ) SubTreeCreationTask(my_root->left,subtree_size/2);
tbb::task* b = new( allocate_child() ) SubTreeCreationTask(my_root->right,subtree_size - subtree_size/2);
recycle_as_continuation();
is_continuation = true;
set_ref_count(2);
spawn(*b);
next = a;
}
}
return next;
}
};
public:
static TreeNode* allocate_node() {
return use_tbbmalloc? tbb::scalable_allocator<TreeNode>().allocate(1) : new TreeNode;
}
static TreeNode* do_in_one_thread( long number_of_nodes ) {
if( number_of_nodes==0 ) {
return NULL;
} else {
TreeNode* n = allocate_node();
n->node_count = number_of_nodes;
n->value = Value(Pi*number_of_nodes);
--number_of_nodes;
n->left = do_in_one_thread( number_of_nodes/2 );
n->right = do_in_one_thread( number_of_nodes - number_of_nodes/2 );
return n;
}
}
static TreeNode* do_in_parallel( long number_of_nodes ) {
TreeNode* root_node;
SubTreeCreationTask& a = *new(tbb::task::allocate_root()) SubTreeCreationTask(root_node, number_of_nodes);
tbb::task::spawn_root_and_wait(a);
return root_node;
}
static TreeNode* create_and_time( long number_of_nodes ) {
tbb::tick_count t0, t1;
TreeNode* root = allocate_node();
root->node_count = number_of_nodes;
root->value = Value(Pi*number_of_nodes);
--number_of_nodes;
t0 = tbb::tick_count::now();
root->left = do_in_one_thread( number_of_nodes/2 );
t1 = tbb::tick_count::now();
printf ("%24s: time = %.1f msec\n", "half created serially", (t1-t0).seconds()*1000);
t0 = tbb::tick_count::now();
root->right = do_in_parallel( number_of_nodes - number_of_nodes/2 );
t1 = tbb::tick_count::now();
printf ("%24s: time = %.1f msec\n", "half done in parallel", (t1-t0).seconds()*1000);
return root;
}
};
int main( int argc, char *argv[] ) {
// Parse command line parameters
// The format is: <exe_name> [-stdmalloc] [num_of_nodes [num_of_threads]]
bool use_tbbmalloc = true;
int arg_idx = 1;
if( argc>1 && strcmp(argv[1], "-stdmalloc")==0 ) {
use_tbbmalloc = false;
arg_idx = 2;
}
long number_of_nodes = argc>arg_idx ? strtol(argv[arg_idx],0,0) : 10000000;
++arg_idx;
int nthread = argc>arg_idx ? strtol(argv[arg_idx],0,0) : tbb::task_scheduler_init::automatic;
// Start up scheduler
// For production, no argument should be provided to the constructor, so that
// the application gets the number of threads that are physically available.
tbb::task_scheduler_init init(nthread);
TreeNode* root;
if( use_tbbmalloc ) {
printf("Tree creation using TBB scalable allocator\n");
root = TreeMaker<tbbmalloc>::create_and_time( number_of_nodes );
} else {
printf("Tree creation using standard operator new\n");
root = TreeMaker<stdmalloc>::create_and_time( number_of_nodes );
}
// Warm up caches
SerialSumTree(root);
printf("Calculations:\n");
const char* which;
for( int i=0; i<3; ++i ) {
tbb::tick_count t0 = tbb::tick_count::now();
Value result;
switch( i ) {
case 0:
which = "SerialSumTree";
result = SerialSumTree(root);
break;
case 1:
which = "SimpleParallelSumTree";
result = SimpleParallelSumTree(root);
break;
case 2:
which = "OptimizedParallelSumTree";
result = OptimizedParallelSumTree(root);
break;
}
tbb::tick_count t1 = tbb::tick_count::now();
printf ("%24s: time = %.1f msec, sum=%g\n", which, (t1-t0).seconds()*1000, result);
}
return 0;
}
|