1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This header contains a preview of a portability system that enables
// CUDA C++ development with NVC++, NVCC, and supported host compilers.
// These interfaces are not guaranteed to be stable.
#ifndef __NV_TARGET_H
#define __NV_TARGET_H
#if defined(__NVCC__) || defined(__CUDACC_RTC__)
# define _NV_COMPILER_NVCC
#elif defined(__NVCOMPILER) && __cplusplus >= 201103L
# define _NV_COMPILER_NVCXX
#elif defined(__clang__) && defined(__CUDA__) && defined(__CUDA_ARCH__)
// clang compiling CUDA code, device mode.
# define _NV_COMPILER_CLANG_CUDA
#endif
#if defined(__CUDACC_RTC__)
# define _NV_FUNCTION_ANNOTATION __device__
#else
# define _NV_FUNCTION_ANNOTATION
#endif
#if defined(_NV_COMPILER_NVCXX)
# define _NV_BITSET_ATTRIBUTE [[nv::__target_bitset]]
#else
# define _NV_BITSET_ATTRIBUTE
#endif
#if (!defined(__ibmxl__)) && \
((defined(__cplusplus) && __cplusplus >= 201103L) || \
(defined(_MSC_VER) && _MSVC_LANG >= 201103L))
# define _NV_TARGET_CPP11
#endif
#if defined(_NV_TARGET_CPP11)
namespace nv {
namespace target {
namespace detail {
typedef unsigned long long base_int_t;
// No host specialization
constexpr base_int_t all_hosts = 1;
// NVIDIA GPUs
constexpr base_int_t sm_35_bit = 1 << 1;
constexpr base_int_t sm_37_bit = 1 << 2;
constexpr base_int_t sm_50_bit = 1 << 3;
constexpr base_int_t sm_52_bit = 1 << 4;
constexpr base_int_t sm_53_bit = 1 << 5;
constexpr base_int_t sm_60_bit = 1 << 6;
constexpr base_int_t sm_61_bit = 1 << 7;
constexpr base_int_t sm_62_bit = 1 << 8;
constexpr base_int_t sm_70_bit = 1 << 9;
constexpr base_int_t sm_72_bit = 1 << 10;
constexpr base_int_t sm_75_bit = 1 << 11;
constexpr base_int_t sm_80_bit = 1 << 12;
constexpr base_int_t sm_86_bit = 1 << 13;
constexpr base_int_t sm_87_bit = 1 << 14;
constexpr base_int_t sm_90_bit = 1 << 15;
constexpr base_int_t all_devices =
sm_35_bit | sm_37_bit |
sm_50_bit | sm_52_bit | sm_53_bit |
sm_60_bit | sm_61_bit | sm_62_bit |
sm_70_bit | sm_72_bit | sm_75_bit |
sm_80_bit | sm_86_bit | sm_87_bit |
sm_90_bit;
// Store a set of targets as a set of bits
struct _NV_BITSET_ATTRIBUTE target_description {
base_int_t targets;
_NV_FUNCTION_ANNOTATION
constexpr target_description(base_int_t a) : targets(a) { }
};
// The type of the user-visible names of the NVIDIA GPU targets
enum class sm_selector : base_int_t {
sm_35 = 35, sm_37 = 37,
sm_50 = 50, sm_52 = 52, sm_53 = 53,
sm_60 = 60, sm_61 = 61, sm_62 = 62,
sm_70 = 70, sm_72 = 72, sm_75 = 75,
sm_80 = 80, sm_86 = 86, sm_87 = 87,
sm_90 = 90,
};
_NV_FUNCTION_ANNOTATION
constexpr base_int_t toint(sm_selector a) {
return static_cast<base_int_t>(a);
}
_NV_FUNCTION_ANNOTATION
constexpr base_int_t bitexact(sm_selector a) {
return toint(a) == 35 ? sm_35_bit :
toint(a) == 37 ? sm_37_bit :
toint(a) == 50 ? sm_50_bit :
toint(a) == 52 ? sm_52_bit :
toint(a) == 53 ? sm_53_bit :
toint(a) == 60 ? sm_60_bit :
toint(a) == 61 ? sm_61_bit :
toint(a) == 62 ? sm_62_bit :
toint(a) == 70 ? sm_70_bit :
toint(a) == 72 ? sm_72_bit :
toint(a) == 75 ? sm_75_bit :
toint(a) == 80 ? sm_80_bit :
toint(a) == 86 ? sm_86_bit :
toint(a) == 87 ? sm_87_bit :
toint(a) == 90 ? sm_90_bit : 0;
}
_NV_FUNCTION_ANNOTATION
constexpr base_int_t bitrounddown(sm_selector a) {
return toint(a) >= 90 ? sm_90_bit :
toint(a) >= 87 ? sm_87_bit :
toint(a) >= 86 ? sm_86_bit :
toint(a) >= 80 ? sm_80_bit :
toint(a) >= 75 ? sm_75_bit :
toint(a) >= 72 ? sm_72_bit :
toint(a) >= 70 ? sm_70_bit :
toint(a) >= 62 ? sm_62_bit :
toint(a) >= 61 ? sm_61_bit :
toint(a) >= 60 ? sm_60_bit :
toint(a) >= 53 ? sm_53_bit :
toint(a) >= 52 ? sm_52_bit :
toint(a) >= 50 ? sm_50_bit :
toint(a) >= 37 ? sm_37_bit :
toint(a) >= 35 ? sm_35_bit : 0;
}
// Public API for NVIDIA GPUs
_NV_FUNCTION_ANNOTATION
constexpr target_description is_exactly(sm_selector a) {
return target_description(bitexact(a));
}
_NV_FUNCTION_ANNOTATION
constexpr target_description provides(sm_selector a) {
return target_description(~(bitrounddown(a) - 1) & all_devices);
}
// Boolean operations on target sets
_NV_FUNCTION_ANNOTATION
constexpr target_description operator&&(target_description a,
target_description b) {
return target_description(a.targets & b.targets);
}
_NV_FUNCTION_ANNOTATION
constexpr target_description operator||(target_description a,
target_description b) {
return target_description(a.targets | b.targets);
}
_NV_FUNCTION_ANNOTATION
constexpr target_description operator!(target_description a) {
return target_description(~a.targets & (all_devices | all_hosts));
}
}
using detail::target_description;
using detail::sm_selector;
// The predicates for basic host/device selection
constexpr target_description is_host =
target_description(detail::all_hosts);
constexpr target_description is_device =
target_description(detail::all_devices);
constexpr target_description any_target =
target_description(detail::all_hosts | detail::all_devices);
constexpr target_description no_target =
target_description(0);
// The public names for NVIDIA GPU architectures
constexpr sm_selector sm_35 = sm_selector::sm_35;
constexpr sm_selector sm_37 = sm_selector::sm_37;
constexpr sm_selector sm_50 = sm_selector::sm_50;
constexpr sm_selector sm_52 = sm_selector::sm_52;
constexpr sm_selector sm_53 = sm_selector::sm_53;
constexpr sm_selector sm_60 = sm_selector::sm_60;
constexpr sm_selector sm_61 = sm_selector::sm_61;
constexpr sm_selector sm_62 = sm_selector::sm_62;
constexpr sm_selector sm_70 = sm_selector::sm_70;
constexpr sm_selector sm_72 = sm_selector::sm_72;
constexpr sm_selector sm_75 = sm_selector::sm_75;
constexpr sm_selector sm_80 = sm_selector::sm_80;
constexpr sm_selector sm_86 = sm_selector::sm_86;
constexpr sm_selector sm_87 = sm_selector::sm_87;
constexpr sm_selector sm_90 = sm_selector::sm_90;
using detail::is_exactly;
using detail::provides;
}
}
#endif // C++11
#include "detail/__target_macros"
#endif // __NV_TARGET_H
|