File: link-builtin-bitcode-gpu-attrs-preserved.cu

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (48 lines) | stat: -rw-r--r-- 2,219 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// Verify the behavior of the +gfxN-insts in the way that
// rocm-device-libs should be built with. e.g. If the device libraries has a function
// with "+gfx11-insts", that attribute should still be present after linking and not
// overwritten with the current target's settings.

// This is important because at this time, many device-libs functions that are only
// available on some GPUs put an attribute such as "+gfx11-insts" so that
// AMDGPURemoveIncompatibleFunctions can detect & remove them if needed.

// Build the fake device library in the way rocm-device-libs should be built.
//
// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa\
// RUN:   -mcode-object-version=none -emit-llvm-bc \
// RUN:   %S/Inputs/ocml-sample-target-attrs.cl -o %t.bc

// Check the default behavior
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
// RUN:   -mlink-builtin-bitcode %t.bc \
// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE

// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 -fcuda-is-device \
// RUN:   -mlink-builtin-bitcode %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE

// Check the case where no internalization is performed
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
// RUN:   -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o -  | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE

// Check the case where no internalization is performed
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 \
// RUN:   -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE


// CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]]
// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="{{.*}}+gfx11-insts{{.*}}"
// NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts"

#define __device__ __attribute__((device))
#define __global__ __attribute__((global))

typedef unsigned long ulong;

extern "C" {
__device__ ulong do_intrin_stuff(void);

__global__ void kernel_f16(ulong* out) {
    *out = do_intrin_stuff();
  }
}