1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
/*
* Copyright 2008-2013 NVIDIA Corporation
* Modifications Copyright© 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <thrust/detail/config.h>
#include <thrust/detail/type_deduction.h>
#include <thrust/detail/nv_target.h>
#include <limits>
THRUST_NAMESPACE_BEGIN
namespace detail
{
template <typename Integer>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
Integer clz(Integer x)
{
Integer result;
NV_IF_TARGET(NV_IS_DEVICE, (
result = ::__clz(x);
), (
int num_bits = 8 * sizeof(Integer);
int num_bits_minus_one = num_bits - 1;
result = num_bits;
for (int i = num_bits_minus_one; i >= 0; --i)
{
if ((Integer(1) << i) & x)
{
result = num_bits_minus_one - i;
break;
}
}
));
return result;
}
template <typename Integer>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
bool is_power_of_2(Integer x)
{
return 0 == (x & (x - 1));
}
template <typename Integer>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
bool is_odd(Integer x)
{
return 1 & x;
}
template <typename Integer>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
Integer log2(Integer x)
{
Integer num_bits = 8 * sizeof(Integer);
Integer num_bits_minus_one = num_bits - 1;
return num_bits_minus_one - clz(x);
}
template <typename Integer>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
Integer log2_ri(Integer x)
{
Integer result = log2(x);
// This is where we round up to the nearest log.
if (!is_power_of_2(x))
++result;
return result;
}
// x/y rounding towards +infinity for integers
// Used to determine # of blocks/warps etc.
template <typename Integer0, typename Integer1>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
// FIXME: Should use common_type.
auto divide_ri(Integer0 const x, Integer1 const y)
THRUST_DECLTYPE_RETURNS((x + (y - 1)) / y)
// x/y rounding towards zero for integers.
// Used to determine # of blocks/warps etc.
template <typename Integer0, typename Integer1>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
auto divide_rz(Integer0 const x, Integer1 const y)
THRUST_DECLTYPE_RETURNS(x / y)
// Round x towards infinity to the next multiple of y.
template <typename Integer0, typename Integer1>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
auto round_i(Integer0 const x, Integer1 const y)
THRUST_DECLTYPE_RETURNS(y * divide_ri(x, y))
// Round x towards 0 to the next multiple of y.
template <typename Integer0, typename Integer1>
THRUST_HOST_DEVICE THRUST_FORCEINLINE
auto round_z(Integer0 const x, Integer1 const y)
THRUST_DECLTYPE_RETURNS(y * divide_rz(x, y))
} // end detail
THRUST_NAMESPACE_END
|