1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
// SPDX-FileCopyrightText: Bradley M. Bell <bradbell@seanet.com>
// SPDX-FileContributor: 2003-22 Bradley M. Bell
// ----------------------------------------------------------------------------
# include <cppad/cppad.hpp>
# include <omp.h>
namespace {
using CppAD::vector;
typedef CPPAD_TESTVECTOR(double) d_vector;
typedef CPPAD_TESTVECTOR( CppAD::AD<double> ) ad_vector;
// --------------------------------------------------------------------
class long_sum_atomic : public CppAD::atomic_base<double> {
private:
const size_t length_of_sum_;
public:
// constructor
long_sum_atomic(const std::string& name, size_t length_of_sum)
:
CppAD::atomic_base<double>(name) ,
length_of_sum_(length_of_sum)
{ }
// forward (only implement zero order)
virtual bool forward(
size_t p ,
size_t q ,
const vector<bool>& vx ,
vector<bool>& vy ,
const vector<double>& tx ,
vector<double>& ty )
{
// check for errors in usage
bool ok = p == 0 && q == 0;
ok &= tx.size() == 1;
ok &= ty.size() == 1;
ok &= vx.size() <= 1;
if( ! ok )
return false;
// variable information
if( vx.size() > 0 )
vy[0] = vx[0];
// value information
ty[0] = 0.0;
for(size_t i = 0; i < length_of_sum_; ++i)
ty[0] += tx[0];
return ok;
}
};
// --------------------------------------------------------------------
// inform CppAD if we are in parallel mode
bool in_parallel(void)
{ return omp_in_parallel() != 0; }
//
// inform CppAD of the current thread number
size_t thread_num(void)
{ return static_cast<size_t>( omp_get_thread_num() ); }
}
// multi_thread_checkpoint
bool multi_atomic_two(void)
{ bool ok = true;
// OpenMP setup
size_t num_threads = 4; // number of threads
omp_set_dynamic(0); // turn off dynamic thread adjustment
omp_set_num_threads( int(num_threads) ); // set number of OMP threads
// check that multi-threading is possible on this machine
if( omp_get_max_threads() < 2 )
{ std::cout << "This machine does not support multi-threading: ";
}
// create checkpoint version of algorithm
size_t n(1), m(1);
ad_vector ax(n), ay(m);
ax[0] = 2.0;
size_t length_of_sum = 5000;
long_sum_atomic atom_fun("long_sum", length_of_sum);
// setup for using CppAD in parallel mode
CppAD::thread_alloc::parallel_setup(num_threads, in_parallel, thread_num);
CppAD::thread_alloc::hold_memory(true);
CppAD::parallel_ad<double>();
// place to hold result for each thread
d_vector y(num_threads);
for(size_t thread = 0; thread < num_threads; thread++)
y[thread] = 0.0;
# pragma omp parallel for
for(int thread = 0; thread < int(num_threads); thread++)
{ ad_vector au(n), av(m);
au[0] = 1.0;
CppAD::Independent(au);
atom_fun(au, av);
CppAD::ADFun<double> f(au, av);
//
d_vector x(n), v(m);
x[0] = double( thread + 1 );
v = f.Forward(0, x);
//
// this assignment has false sharing; i.e., will case cache resets
// (conversion avoids boost vector conversion warning)
y[size_t(thread)] = v[0];
}
// check the results
for(size_t thread = 0; thread < num_threads; thread++)
{ double check = double( length_of_sum * (thread + 1) );
ok &= check == y[thread];
}
return ok;
}
|