File: kill-infinite-loop.ll

package info (click to toggle)
llvm-toolchain-11 1%3A11.0.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 995,808 kB
  • sloc: cpp: 4,767,656; ansic: 760,916; asm: 477,436; python: 170,940; objc: 69,804; lisp: 29,914; sh: 23,855; f90: 18,173; pascal: 7,551; perl: 7,471; ml: 5,603; awk: 3,489; makefile: 2,573; xml: 915; cs: 573; fortran: 503; javascript: 452
file content (86 lines) | stat: -rw-r--r-- 2,878 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
; Although it's modeled without any control flow in order to get better code
; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls
; it with "true". In case it's called in a provably infinite loop, we still
; need to successfully exit and export something, even if we can't know where
; to jump to in the LLVM IR. Therefore we insert a null export ourselves in
; this case right before the s_endpgm to avoid GPU hangs, which is what this
; tests.

; CHECK-LABEL: return_void
; Make sure that we remove the done bit from the original export
; CHECK: exp mrt0 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} vm
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @return_void(float %0) #0 {
main_body:
  %cmp = fcmp olt float %0, 1.000000e+01
  br i1 %cmp, label %end, label %loop

loop:
  call void @llvm.amdgcn.kill(i1 false) #3
  br label %loop

end:
  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3
  ret void
}

; Check that we also remove the done bit from compressed exports correctly.
; CHECK-LABEL: return_void_compr
; CHECK: exp mrt0 v{{[0-9]+}}, off, v{{[0-9]+}}, off compr vm
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @return_void_compr(float %0) #0 {
main_body:
  %cmp = fcmp olt float %0, 1.000000e+01
  br i1 %cmp, label %end, label %loop

loop:
  call void @llvm.amdgcn.kill(i1 false) #3
  br label %loop

end:
  call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3
  ret void
}

; test the case where there's only a kill in an infinite loop
; CHECK-LABEL: only_kill
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; SIInsertSkips inserts an extra null export here, but it should be harmless.
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @only_kill() #0 {
main_body:
  br label %loop

loop:
  call void @llvm.amdgcn.kill(i1 false) #3
  br label %loop
}

; Check that the epilog is the final block
; CHECK-LABEL: return_nonvoid
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
define amdgpu_ps float @return_nonvoid(float %0) #0 {
main_body:
  %cmp = fcmp olt float %0, 1.000000e+01
  br i1 %cmp, label %end, label %loop

loop:
  call void @llvm.amdgcn.kill(i1 false) #3
  br label %loop

end:
  ret float 0.
}

declare void @llvm.amdgcn.kill(i1) #0
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0

attributes #0 = { nounwind }