1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
|
// REQUIRES: nvptx-registered-target
//
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
// RUN: sm_75 -target-feature +ptx70 -fcuda-is-device -fnative-half-type -S \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX70_SM75 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
// RUN: sm_80 -target-feature +ptx70 -fcuda-is-device -fnative-half-type -S \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX70_SM80 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown \
// RUN: -target-cpu sm_80 -target-feature +ptx70 -fcuda-is-device \
// RUN: -fnative-half-type -S -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX70_SM80 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
// RUN: sm_86 -target-feature +ptx72 -fcuda-is-device -fnative-half-type -S \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX72_SM86 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown \
// RUN: -target-cpu sm_86 -target-feature +ptx72 -fcuda-is-device \
// RUN: -fnative-half-type -S -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX72_SM86 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
// RUN: sm_53 -target-feature +ptx42 -fcuda-is-device -fnative-half-type -S \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown \
// RUN: -target-cpu sm_53 -target-feature +ptx42 -fcuda-is-device \
// RUN: -fnative-half-type -S -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s
#define __device__ __attribute__((device))
__device__ void nvvm_ex2_sm75() {
#if __CUDA_ARCH__ >= 750
// CHECK_PTX70_SM75: call half @llvm.nvvm.ex2.approx.f16
__nvvm_ex2_approx_f16(0.1f16);
// CHECK_PTX70_SM75: call <2 x half> @llvm.nvvm.ex2.approx.f16x2
__nvvm_ex2_approx_f16x2({0.1f16, 0.7f16});
#endif
// CHECK: ret void
}
// CHECK-LABEL: nvvm_min_max_sm80
__device__ void nvvm_min_max_sm80() {
#if __CUDA_ARCH__ >= 800
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmin.f16
__nvvm_fmin_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmin.ftz.f16
__nvvm_fmin_ftz_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmin.nan.f16
__nvvm_fmin_nan_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmin.ftz.nan.f16
__nvvm_fmin_ftz_nan_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmin.f16x2
__nvvm_fmin_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmin.ftz.f16x2
__nvvm_fmin_ftz_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmin.nan.f16x2
__nvvm_fmin_nan_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmin.ftz.nan.f16x2
__nvvm_fmin_ftz_nan_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmax.f16
__nvvm_fmax_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmax.ftz.f16
__nvvm_fmax_ftz_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmax.nan.f16
__nvvm_fmax_nan_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call half @llvm.nvvm.fmax.ftz.nan.f16
__nvvm_fmax_ftz_nan_f16(0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmax.f16x2
__nvvm_fmax_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmax.ftz.f16x2
__nvvm_fmax_ftz_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmax.nan.f16x2
__nvvm_fmax_nan_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fmax.ftz.nan.f16x2
__nvvm_fmax_ftz_nan_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
#endif
// CHECK: ret void
}
// CHECK-LABEL: nvvm_fma_f16_f16x2_sm80
__device__ void nvvm_fma_f16_f16x2_sm80() {
#if __CUDA_ARCH__ >= 800
// CHECK_PTX70_SM80: call half @llvm.nvvm.fma.rn.relu.f16
__nvvm_fma_rn_relu_f16(0.1f16, 0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call half @llvm.nvvm.fma.rn.ftz.relu.f16
__nvvm_fma_rn_ftz_relu_f16(0.1f16, 0.1f16, 0.1f16);
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fma.rn.relu.f16x2
__nvvm_fma_rn_relu_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
// CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.fma.rn.ftz.relu.f16x2
__nvvm_fma_rn_ftz_relu_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
#endif
// CHECK: ret void
}
// CHECK-LABEL: nvvm_fma_f16_f16x2_sm53
__device__ void nvvm_fma_f16_f16x2_sm53() {
#if __CUDA_ARCH__ >= 530
// CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.f16
__nvvm_fma_rn_f16(0.1f16, 0.1f16, 0.1f16);
// CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.f16
__nvvm_fma_rn_ftz_f16(0.1f16, 0.1f16, 0.1f16);
// CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.sat.f16
__nvvm_fma_rn_sat_f16(0.1f16, 0.1f16, 0.1f16);
// CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.sat.f16
__nvvm_fma_rn_ftz_sat_f16(0.1f16, 0.1f16, 0.1f16);
// CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.f16x2
__nvvm_fma_rn_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
// CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2
__nvvm_fma_rn_ftz_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
// CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.sat.f16x2
__nvvm_fma_rn_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
// CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.sat.f16x2
__nvvm_fma_rn_ftz_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
#endif
// CHECK: ret void
}
// CHECK-LABEL: nvvm_min_max_sm86
__device__ void nvvm_min_max_sm86() {
#if __CUDA_ARCH__ >= 860
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmin.xorsign.abs.f16
__nvvm_fmin_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmin.ftz.xorsign.abs.f16
__nvvm_fmin_ftz_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmin.nan.xorsign.abs.f16
__nvvm_fmin_nan_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16
__nvvm_fmin_ftz_nan_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmin.xorsign.abs.f16x2
__nvvm_fmin_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmin.ftz.xorsign.abs.f16x2
__nvvm_fmin_ftz_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmin.nan.xorsign.abs.f16x2
__nvvm_fmin_nan_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16x2
__nvvm_fmin_ftz_nan_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmax.xorsign.abs.f16
__nvvm_fmax_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmax.ftz.xorsign.abs.f16
__nvvm_fmax_ftz_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmax.nan.xorsign.abs.f16
__nvvm_fmax_nan_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call half @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16
__nvvm_fmax_ftz_nan_xorsign_abs_f16(0.1f16, 0.1f16);
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmax.xorsign.abs.f16x2
__nvvm_fmax_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmax.ftz.xorsign.abs.f16x2
__nvvm_fmax_ftz_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmax.nan.xorsign.abs.f16x2
__nvvm_fmax_nan_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
// CHECK_PTX72_SM86: call <2 x half> @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2
__nvvm_fmax_ftz_nan_xorsign_abs_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16});
#endif
// CHECK: ret void
}
|