File: reg-coalescer-sched-crash.ll

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.4-5
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,236,516 kB
  • sloc: cpp: 7,619,569; ansic: 1,433,956; asm: 1,058,748; python: 252,181; f90: 94,671; objc: 70,753; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,585; awk: 3,523; javascript: 2,272; xml: 892; fortran: 770
file content (103 lines) | stat: -rw-r--r-- 3,999 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s

; The register coalescer introduces a verifier error which later
; results in a crash during scheduling.

declare i32 @llvm.amdgcn.workitem.id.x() #0

define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, i1 %c0) #1 {
; GFX6-LABEL: reg_coalescer_breaks_dead:
; GFX6:       ; %bb.0: ; %bb
; GFX6-NEXT:    v_mov_b32_e32 v1, 0
; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
; GFX6-NEXT:    v_mov_b32_e32 v2, 0
; GFX6-NEXT:    s_and_saveexec_b64 s[0:1], vcc
; GFX6-NEXT:    s_cbranch_execz .LBB0_2
; GFX6-NEXT:  ; %bb.1: ; %bb3
; GFX6-NEXT:    s_load_dword s2, s[4:5], 0xb
; GFX6-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x9
; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], 3
; GFX6-NEXT:    s_add_u32 s2, s6, s2
; GFX6-NEXT:    s_addc_u32 s3, s7, s3
; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
; GFX6-NEXT:    v_mov_b32_e32 v1, s2
; GFX6-NEXT:    v_mov_b32_e32 v2, s3
; GFX6-NEXT:  .LBB0_2: ; %bb4
; GFX6-NEXT:    s_or_b64 exec, exec, s[0:1]
; GFX6-NEXT:    s_load_dword s0, s[4:5], 0xe
; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
; GFX6-NEXT:    s_bitcmp0_b32 s0, 0
; GFX6-NEXT:    s_cbranch_scc1 .LBB0_4
; GFX6-NEXT:  ; %bb.3: ; %bb15
; GFX6-NEXT:    s_mov_b32 m0, -1
; GFX6-NEXT:    ds_write_b64 v0, v[1:2]
; GFX6-NEXT:  .LBB0_4: ; %bb16
;
; GFX8-LABEL: reg_coalescer_breaks_dead:
; GFX8:       ; %bb.0: ; %bb
; GFX8-NEXT:    v_mov_b32_e32 v1, 0
; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT:    v_mov_b32_e32 v2, 0
; GFX8-NEXT:    s_and_saveexec_b64 s[0:1], vcc
; GFX8-NEXT:    s_cbranch_execz .LBB0_2
; GFX8-NEXT:  ; %bb.1: ; %bb3
; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x2c
; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    s_ashr_i32 s3, s2, 31
; GFX8-NEXT:    s_lshl_b64 s[2:3], s[2:3], 3
; GFX8-NEXT:    s_add_u32 s2, s6, s2
; GFX8-NEXT:    s_addc_u32 s3, s7, s3
; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    v_mov_b32_e32 v1, s2
; GFX8-NEXT:    v_mov_b32_e32 v2, s3
; GFX8-NEXT:  .LBB0_2: ; %bb4
; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x38
; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
; GFX8-NEXT:    s_bitcmp0_b32 s0, 0
; GFX8-NEXT:    s_cbranch_scc1 .LBB0_4
; GFX8-NEXT:  ; %bb.3: ; %bb15
; GFX8-NEXT:    s_mov_b32 m0, -1
; GFX8-NEXT:    ds_write_b64 v0, v[1:2]
; GFX8-NEXT:  .LBB0_4: ; %bb16
bb:
  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
  %cmp0 = icmp eq i32 %id.x, 0
  br i1 %cmp0, label %bb3, label %bb4

bb3:                                              ; preds = %bb
  %tmp = ashr exact i32 poison, 8
  br label %bb6

bb4:                                              ; preds = %bb6, %bb
  %tmp5 = phi <2 x i32> [ zeroinitializer, %bb ], [ %tmp13, %bb6 ]
  br i1 %c0, label %bb15, label %bb16

bb6:                                              ; preds = %bb6, %bb3
  %tmp7 = phi <2 x i32> [ zeroinitializer, %bb3 ], [ %tmp13, %bb6 ]
  %tmp8 = add nsw i32 0, %arg1
  %tmp9 = add nsw i32 %tmp8, 0
  %tmp10 = sext i32 %tmp9 to i64
  %tmp11 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg, i64 %tmp10
  %tmp12 = load <2 x i32>, ptr addrspace(1) %tmp11, align 8
  %tmp13 = add <2 x i32> %tmp12, %tmp7
  %tmp14 = icmp slt i32 poison, %arg2
  br i1 %tmp14, label %bb6, label %bb4

bb15:                                             ; preds = %bb4
  store <2 x i32> %tmp5, ptr addrspace(3) poison, align 8
  br label %bb16

bb16:                                             ; preds = %bb15, %bb4
  unreachable
}

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }