File: issue139317-bad-opsel-reg-sequence-fold.ll

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,245,028 kB
  • sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,675; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (67 lines) | stat: -rw-r--r-- 3,101 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefix=GFX942 %s

; Check for correct folding of the constants produced by the
; stepvector into the fadd. The value should not get lost when folding
; through subregister extracts of reg_sequence.
define amdgpu_kernel void @stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38(ptr addrspace(1) readonly captures(none) %.global, ptr addrspace(1) writeonly captures(none) %.global1, i32 %arg2) {
; GFX942-LABEL: stepper_test_kernel_DType_I6A6AcB6A6AsA6A6A_68a5362b97a102776ef47f0e8e894a38:
; GFX942:       ; %bb.0: ; %bb
; GFX942-NEXT:    s_load_dword s6, s[4:5], 0x10
; GFX942-NEXT:    s_mov_b32 s7, 0
; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
; GFX942-NEXT:    s_cmp_eq_u32 s6, 0
; GFX942-NEXT:    s_cbranch_scc1 .LBB0_3
; GFX942-NEXT:  ; %bb.1: ; %.lr.ph.preheader
; GFX942-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
; GFX942-NEXT:    s_mov_b32 s8, 0x47004600
; GFX942-NEXT:    s_mov_b32 s9, 0x45004400
; GFX942-NEXT:    s_mov_b32 s10, 0x42004000
; GFX942-NEXT:    s_mov_b64 s[4:5], 0
; GFX942-NEXT:    v_mov_b32_e32 v2, 0
; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
; GFX942-NEXT:  .LBB0_2: ; %.lr.ph
; GFX942-NEXT:    ; =>This Inner Loop Header: Depth=1
; GFX942-NEXT:    s_waitcnt lgkmcnt(0)
; GFX942-NEXT:    global_load_dwordx4 v[4:7], v2, s[2:3]
; GFX942-NEXT:    s_add_u32 s4, s4, 8
; GFX942-NEXT:    s_addc_u32 s5, s5, 0
; GFX942-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1]
; GFX942-NEXT:    s_waitcnt vmcnt(0)
; GFX942-NEXT:    v_pk_add_f16 v7, v7, s8
; GFX942-NEXT:    v_pk_add_f16 v6, v6, s9
; GFX942-NEXT:    v_pk_add_f16 v5, v5, s10
; GFX942-NEXT:    v_pk_add_f16 v4, v4, 1.0 op_sel:[0,1] op_sel_hi:[1,0]
; GFX942-NEXT:    global_store_dwordx4 v2, v[4:7], s[0:1]
; GFX942-NEXT:    s_add_u32 s0, s0, 16
; GFX942-NEXT:    s_addc_u32 s1, s1, 0
; GFX942-NEXT:    s_add_u32 s2, s2, 16
; GFX942-NEXT:    s_addc_u32 s3, s3, 0
; GFX942-NEXT:    s_cbranch_vccnz .LBB0_2
; GFX942-NEXT:  .LBB0_3: ; %._crit_edge
; GFX942-NEXT:    s_endpgm
bb:
  %i = tail call <8 x i32> @llvm.stepvector.v8i32()
  %i3 = sitofp <8 x i32> %i to <8 x half>
  %i4 = zext i32 %arg2 to i64
  %.not = icmp eq i32 %arg2, 0
  br i1 %.not, label %._crit_edge, label %.lr.ph

.lr.ph:                                           ; preds = %.lr.ph, %bb
  %i5 = phi i64 [ %i6, %.lr.ph ], [ 0, %bb ]
  %i6 = add nuw nsw i64 %i5, 8
  %i7 = getelementptr inbounds nuw half, ptr addrspace(1) %.global1, i64 %i5
  %i8 = load <8 x half>, ptr addrspace(1) %i7, align 2
  %i9 = fadd <8 x half> %i8, %i3
  %i10 = getelementptr inbounds nuw half, ptr addrspace(1) %.global, i64 %i5
  store <8 x half> %i9, ptr addrspace(1) %i10, align 2
  %i11 = icmp samesign ult i64 %i6, %i4
  br i1 %i11, label %.lr.ph, label %._crit_edge

._crit_edge:                                      ; preds = %.lr.ph, %bb
  ret void
}

declare <8 x i32> @llvm.stepvector.v8i32() #0

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }