1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
/*
* Copyright 2008-2009 NVIDIA Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <thrust/pair.h>
#define CUDA_SAFE_CALL_NO_SYNC( call) do { \
cudaError err = call; \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} } while (0)
#define CUDA_SAFE_CALL( call) do { \
CUDA_SAFE_CALL_NO_SYNC(call); \
cudaError err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} } while (0)
namespace cusp
{
namespace detail
{
namespace device
{
template <typename Size1, typename Size2>
__host__ __device__
Size1 DIVIDE_INTO(Size1 N, Size2 granularity)
{
return (N + (granularity - 1)) / granularity;
}
template <typename T>
thrust::pair<T,T> uniform_splitting(const T N, const T granularity, const T max_intervals)
{
const T grains = DIVIDE_INTO(N, granularity);
// one grain per interval
if (grains <= max_intervals)
return thrust::make_pair(granularity, grains);
// insures that:
// num_intervals * interval_size is >= N
// and
// (num_intervals - 1) * interval_size is < N
const T grains_per_interval = DIVIDE_INTO(grains, max_intervals);
const T interval_size = grains_per_interval * granularity;
const T num_intervals = DIVIDE_INTO(N, interval_size);
return thrust::make_pair(interval_size, num_intervals);
}
} // end namespace device
} // end namespace detail
} // end namespace cusp
|