File: promote-alloca-memset.ll

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (87 lines) | stat: -rw-r--r-- 3,710 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s

; Checks that memsets don't block PromoteAlloca.

define amdgpu_kernel void @memset_all_zero(i64 %val) {
; CHECK-LABEL: @memset_all_zero(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <6 x i64> zeroinitializer, i64 [[VAL:%.*]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <6 x i64> [[TMP0]], i64 [[VAL]], i64 1
; CHECK-NEXT:    ret void
;
entry:
  %stack = alloca [6 x i64], align 4, addrspace(5)
  call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 48, i1 false)
  store i64 %val, ptr addrspace(5) %stack
  %reload = load i64, ptr addrspace(5) %stack
  %stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
  store i64 %val, ptr addrspace(5) %stack.1
  ret void
}

define amdgpu_kernel void @memset_all_5(i64 %val) {
; CHECK-LABEL: @memset_all_5(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i64> <i64 361700864190383365, i64 361700864190383365, i64 361700864190383365, i64 361700864190383365>, i64 [[VAL:%.*]], i32 0
; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i64 1
; CHECK-NEXT:    ret void
;
entry:
  %stack = alloca [4 x i64], align 4, addrspace(5)
  call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 5, i64 32, i1 false)
  store i64 %val, ptr addrspace(5) %stack
  %reload = load i64, ptr addrspace(5) %stack
  %stack.1 = getelementptr [6 x i64], ptr addrspace(5) %stack, i64 0, i64 1
  store i64 %val, ptr addrspace(5) %stack.1
  ret void
}

define amdgpu_kernel void @memset_volatile_nopromote(i64 %val) {
; CHECK-LABEL: @memset_volatile_nopromote(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT:    call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 32, i1 true)
; CHECK-NEXT:    store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %stack = alloca [4 x i64], align 4, addrspace(5)
  call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 32, i1 true)
  store i64 %val, ptr addrspace(5) %stack
  ret void
}

define amdgpu_kernel void @memset_badsize_nopromote(i64 %val) {
; CHECK-LABEL: @memset_badsize_nopromote(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT:    call void @llvm.memset.p5.i64(ptr addrspace(5) [[STACK]], i8 0, i64 31, i1 true)
; CHECK-NEXT:    store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %stack = alloca [4 x i64], align 4, addrspace(5)
  call void @llvm.memset.p5.i64(ptr addrspace(5) %stack, i8 0, i64 31, i1 true)
  store i64 %val, ptr addrspace(5) %stack
  ret void
}

define amdgpu_kernel void @memset_offset_ptr_nopromote(i64 %val) {
; CHECK-LABEL: @memset_offset_ptr_nopromote(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5)
; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [4 x i64], ptr addrspace(5) [[STACK]], i64 0, i64 1
; CHECK-NEXT:    call void @llvm.memset.p5.i64(ptr addrspace(5) [[GEP]], i8 0, i64 24, i1 true)
; CHECK-NEXT:    store i64 [[VAL:%.*]], ptr addrspace(5) [[STACK]], align 4
; CHECK-NEXT:    ret void
;
entry:
  %stack = alloca [4 x i64], align 4, addrspace(5)
  %gep = getelementptr [4 x i64], ptr addrspace(5) %stack, i64 0, i64 1
  call void @llvm.memset.p5.i64(ptr addrspace(5) %gep, i8 0, i64 24, i1 true)
  store i64 %val, ptr addrspace(5) %stack
  ret void
}

declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)