File: amdgpu-workgroup-size.cu

package info (click to toggle)
llvm-toolchain-14 1%3A14.0.6-16
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,496,368 kB
  • sloc: cpp: 5,593,980; ansic: 986,873; asm: 585,869; python: 184,223; objc: 72,530; lisp: 31,119; f90: 27,793; javascript: 9,780; pascal: 9,762; sh: 9,482; perl: 7,468; ml: 5,432; awk: 3,523; makefile: 2,547; xml: 953; cs: 573; fortran: 567
file content (25 lines) | stat: -rw-r--r-- 1,106 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
// RUN:     | FileCheck %s

#include "Inputs/cuda.h"

// CHECK-LABEL: test_get_workgroup_size
// CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 4
// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 6
// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
// CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 8
// CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
__device__ void test_get_workgroup_size(int d, int *out)
{
  switch (d) {
  case 0: *out = __builtin_amdgcn_workgroup_size_x(); break;
  case 1: *out = __builtin_amdgcn_workgroup_size_y(); break;
  case 2: *out = __builtin_amdgcn_workgroup_size_z(); break;
  default: *out = 0;
  }
}

// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}