File: link-builtin-bitcode-gpu-attrs-preserved.cu

package info (click to toggle)
llvm-toolchain-18 1%3A18.1.8-18
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,908,340 kB
  • sloc: cpp: 6,667,937; ansic: 1,440,452; asm: 883,619; python: 230,549; objc: 76,880; f90: 74,238; lisp: 35,989; pascal: 16,571; sh: 10,229; perl: 7,459; ml: 5,047; awk: 3,523; makefile: 2,987; javascript: 2,149; xml: 892; fortran: 649; cs: 573
file content (48 lines) | stat: -rw-r--r-- 2,219 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// Verify the behavior of the +gfxN-insts in the way that
// rocm-device-libs should be built with. e.g. If the device libraries has a function
// with "+gfx11-insts", that attribute should still be present after linking and not
// overwritten with the current target's settings.

// This is important because at this time, many device-libs functions that are only
// available on some GPUs put an attribute such as "+gfx11-insts" so that
// AMDGPURemoveIncompatibleFunctions can detect & remove them if needed.

// Build the fake device library in the way rocm-device-libs should be built.
//
// RUN: %clang_cc1 -x cl -triple amdgcn-amd-amdhsa\
// RUN:   -mcode-object-version=none -emit-llvm-bc \
// RUN:   %S/Inputs/ocml-sample-target-attrs.cl -o %t.bc

// Check the default behavior
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 -fcuda-is-device \
// RUN:   -mlink-builtin-bitcode %t.bc \
// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE

// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 -fcuda-is-device \
// RUN:   -mlink-builtin-bitcode %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,INTERNALIZE

// Check the case where no internalization is performed
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx803 \
// RUN:   -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o -  | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE

// Check the case where no internalization is performed
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx1101 \
// RUN:   -fcuda-is-device -mlink-bitcode-file %t.bc -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,NOINTERNALIZE


// CHECK: define {{.*}} i64 @do_intrin_stuff() #[[ATTR:[0-9]+]]
// INTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-cpu"="gfx{{.*}}" "target-features"="{{.*}}+gfx11-insts{{.*}}"
// NOINTERNALIZE: attributes #[[ATTR]] = {{.*}} "target-features"="+gfx11-insts"

#define __device__ __attribute__((device))
#define __global__ __attribute__((global))

typedef unsigned long ulong;

extern "C" {
__device__ ulong do_intrin_stuff(void);

__global__ void kernel_f16(ulong* out) {
    *out = do_intrin_stuff();
  }
}