1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
enable_language(CUDA)
set(THRUST_KNOWN_COMPUTE_ARCHS 35 37 50 52 53 60 61 62 70 72 75 80 86)
# Split CUDA_FLAGS into 3 parts:
#
# THRUST_CUDA_FLAGS_BASE: Common CUDA flags for all targets.
# THRUST_CUDA_FLAGS_RDC: Additional CUDA flags for targets compiled with RDC.
# THRUST_CUDA_FLAGS_NO_RDC: Additional CUDA flags for targets compiled without RDC.
#
# This is necessary because CUDA SMs 5.3, 6.2, and 7.2 do not support RDC, but
# we want to always build some targets (e.g. testing/cuda/*) with RDC.
# We work around this by building the "always RDC" targets without support for
# those SMs. This requires two sets of CUDA_FLAGS.
#
# Enabling any of those SMs along with the ENABLE_RDC options will result in a
# configuration error.
#
# Because of how CMake handles the CMAKE_CUDA_FLAGS variables, every target
# generated in a given directory will use the same value for CMAKE_CUDA_FLAGS,
# which is determined at the end of the directory's scope. This means caution
# should be used when trying to build different targets with different flags,
# since they might not behave as expected. This will improve with CMake 3.18,
# which add the DEVICE_LINK genex, fixing the issue with using per-target
# CUDA_FLAGS: https://gitlab.kitware.com/cmake/cmake/-/issues/18265
set(THRUST_CUDA_FLAGS_BASE "${CMAKE_CUDA_FLAGS}")
set(THRUST_CUDA_FLAGS_RDC)
set(THRUST_CUDA_FLAGS_NO_RDC)
# Archs that don't support RDC:
set(no_rdc_archs 53 62 72)
# Find the highest arch:
list(SORT THRUST_KNOWN_COMPUTE_ARCHS)
list(LENGTH THRUST_KNOWN_COMPUTE_ARCHS max_idx)
math(EXPR max_idx "${max_idx} - 1")
list(GET THRUST_KNOWN_COMPUTE_ARCHS ${max_idx} highest_arch)
option(THRUST_AUTO_DETECT_COMPUTE_ARCHS
"If ON, compute architectures for all GPUs in the current system are enabled and all other compute architectures are disabled."
OFF
)
if (THRUST_AUTO_DETECT_COMPUTE_ARCHS)
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
message(STATUS "Thrust: Using NVC++ builtin automatic compute architecture detection.")
else()
set(detect_compute_archs_source ${Thrust_SOURCE_DIR}/cmake/detect_compute_archs.cu)
set(detect_compute_archs_exe ${PROJECT_BINARY_DIR}/detect_compute_archs)
set(detect_compute_archs_error_log ${PROJECT_BINARY_DIR}/detect_compute_archs.stderr.log)
execute_process(
COMMAND ${CMAKE_CUDA_COMPILER}
-std=c++11
-o ${detect_compute_archs_exe}
--run
${detect_compute_archs_source}
OUTPUT_VARIABLE detected_archs
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_FILE ${detect_compute_archs_error_log})
if ("NONE" STREQUAL "${detected_archs}")
set(detected_message " none")
else()
foreach (arch IN LISTS detected_archs)
string(APPEND detected_message " sm_${arch}")
endforeach()
endif()
message(STATUS "Thrust: Automatically detected compute architectures:${detected_message}")
endif()
endif()
set(option_init OFF)
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
set(option_init ON)
endif()
option(THRUST_DISABLE_ARCH_BY_DEFAULT
"If ON, then all compute architectures are disabled on the initial CMake run."
${option_init}
)
set(option_init ON)
if (THRUST_DISABLE_ARCH_BY_DEFAULT OR THRUST_AUTO_DETECT_COMPUTE_ARCHS)
set(option_init OFF)
endif()
set(num_archs_enabled 0)
foreach (arch IN LISTS THRUST_KNOWN_COMPUTE_ARCHS)
set(this_option_init ${option_init})
if (${arch} IN_LIST detected_archs)
set(this_option_init ON)
endif()
option(THRUST_ENABLE_COMPUTE_${arch}
"Enable code generation for tests for sm_${arch}"
${this_option_init}
)
if (NOT THRUST_ENABLE_COMPUTE_${arch})
continue()
endif()
math(EXPR num_archs_enabled "${num_archs_enabled} + 1")
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
if (NOT ${num_archs_enabled} EQUAL 1)
message(FATAL_ERROR
"NVCXX does not support compilation for multiple device architectures "
"at once."
)
endif()
set(arch_flag "-gpu=cc${arch}")
else()
set(arch_flag "-gencode arch=compute_${arch},code=sm_${arch}")
endif()
string(APPEND compute_message " sm_${arch}")
string(APPEND THRUST_CUDA_FLAGS_NO_RDC " ${arch_flag}")
if (NOT arch IN_LIST no_rdc_archs)
string(APPEND THRUST_CUDA_FLAGS_RDC " ${arch_flag}")
endif()
endforeach()
if (NOT "NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
option(THRUST_ENABLE_COMPUTE_FUTURE
"Enable code generation for tests for compute_${highest_arch}"
${option_init}
)
if (THRUST_ENABLE_COMPUTE_FUTURE)
string(APPEND THRUST_CUDA_FLAGS_BASE
" -gencode arch=compute_${highest_arch},code=compute_${highest_arch}"
)
string(APPEND compute_message " compute_${highest_arch}")
endif()
endif()
message(STATUS "Thrust: Explicitly enabled compute architectures:${compute_message}")
# RDC is off by default in NVCC and on by default in NVC++. Turning off RDC
# isn't currently supported by NVC++. So, we default to RDC off for NVCC and
# RDC on for NVC++.
set(option_init OFF)
if ("NVCXX" STREQUAL "${CMAKE_CUDA_COMPILER_ID}")
set(option_init ON)
endif()
option(THRUST_ENABLE_TESTS_WITH_RDC
"Build all Thrust tests with RDC; tests that require RDC are not affected by this option."
${option_init}
)
option(THRUST_ENABLE_EXAMPLES_WITH_RDC
"Build all Thrust examples with RDC; examples which require RDC are not affected by this option."
${option_init}
)
# Check for RDC/SM compatibility and error/warn if necessary
foreach (sm IN LISTS no_rdc_archs)
set(sm_opt THRUST_ENABLE_COMPUTE_${sm})
if (${sm_opt})
foreach (opt IN ITEMS TESTS EXAMPLES)
set(rdc_opt THRUST_ENABLE_${opt}_WITH_RDC)
if (${rdc_opt})
message(FATAL_ERROR
"${rdc_opt} is incompatible with ${sm_opt}, since sm_${sm} does not "
"support RDC."
)
endif()
endforeach()
message(NOTICE
"sm_${sm} does not support RDC. Targets that require RDC will be built "
"without support for this architecture."
)
endif()
endforeach()
# By default RDC is not used:
set(CMAKE_CUDA_FLAGS "${THRUST_CUDA_FLAGS_BASE} ${THRUST_CUDA_FLAGS_NO_RDC}")
|