1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
# RUN: llc -march=hexagon -run-pass post-RA-sched %s -o - | FileCheck %s
# Test that the Post RA scheduler does not schedule back-to-back loads
# when there is another instruction to schedule. The scheduler avoids
# the back-to-back loads to reduce potential bank conflicts.
# CHECK: = L2_loadrigp
# CHECK: = A2_tfr
# CHECK: = L2_loadrigp
# CHECK: = L4_loadri_rr
# CHECK: = S2_tstbit_i
# CHECK: = L4_loadri_rr
--- |
%s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
%s.1 = type { i32, i32 }
@g0 = global i64 0
@g1 = global i64 0
@g2 = global i32 0
@g3 = global i32 0
@g4 = global i8 0
declare i32 @llvm.hexagon.S2.cl0(i32) #0
declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) #0
declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) #0
declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) #0
declare i64 @llvm.hexagon.A2.vaddws(i64, i64) #0
declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #0
define void @f0(i32 %a0) {
b0:
%v0 = bitcast ptr inttoptr (i32 -121502345 to ptr) to ptr
br label %b1
b1: ; preds = %b5, %b0
%v1 = phi i32 [ 0, %b0 ], [ %v28, %b5 ]
%v2 = phi i32 [ 0, %b0 ], [ %v27, %b5 ]
%v3 = load i32, ptr @g2, align 4
%v4 = load i32, ptr @g3, align 8
%v5 = and i32 %v4, %v3
%v6 = getelementptr [10 x %s.0], ptr %v0, i32 0, i32 %v2
%v8 = getelementptr %s.0, ptr %v6, i32 0, i32 12
%v9 = getelementptr %s.0, ptr %v6, i32 0, i32 13
br label %b2
b2: ; preds = %b4, %b1
%v10 = phi i64 [ %v24, %b4 ], [ 0, %b1 ]
%v11 = phi i32 [ %v13, %b4 ], [ %v5, %b1 ]
%v12 = tail call i32 @llvm.hexagon.S2.cl0(i32 %v11)
%v13 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %v11, i32 %v12)
%v14 = getelementptr [24 x i32], ptr %v8, i32 0, i32 %v12
%v15 = load i32, ptr %v14, align 4
%v16 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %v15, i32 %v15)
%v17 = getelementptr [24 x i32], ptr %v9, i32 0, i32 %v12
%v18 = load i32, ptr %v17, align 4
%v19 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %v16, i32 %v18, i32 %v18)
%v20 = load i8, ptr @g4, align 1
%v21 = and i8 %v20, 1
%v22 = icmp eq i8 %v21, 0
br i1 %v22, label %b3, label %b4
b3: ; preds = %b2
%v23 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %v10, i64 %v19)
store i64 %v23, ptr @g0, align 8
br label %b4
b4: ; preds = %b3, %b2
%v24 = phi i64 [ %v23, %b3 ], [ %v10, %b2 ]
%v25 = icmp eq i32 %v13, 0
br i1 %v25, label %b5, label %b2
b5: ; preds = %b4
%v26 = add i32 %v2, 1
%v27 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %v26, i32 10)
%v28 = add i32 %v1, 1
%v29 = icmp eq i32 %v28, %a0
br i1 %v29, label %b6, label %b1
b6: ; preds = %b5
store i64 %v19, ptr @g1, align 8
ret void
}
attributes #0 = { nounwind readnone }
...
---
name: f0
alignment: 16
tracksRegLiveness: true
registers:
liveins:
- { reg: '$r0', virtual-reg: '' }
fixedStack:
stack:
constants:
body: |
bb.0:
successors: %bb.1(0x80000000)
liveins: $r0:0x00000001
$r3 = A2_tfrsi 0
$r2 = A2_tfrsi -121502345
$r4 = A2_tfrsi 10
J2_loop0r %bb.1, killed $r0, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
bb.1 (machine-block-address-taken):
successors: %bb.2(0x80000000)
liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $sa0:0x00000004
$r5 = M2_mpysip $r3, 1824
$r7 = L2_loadrigp @g2, implicit $gp :: (dereferenceable load (s32) from @g2)
$r8 = L2_loadrigp @g3, implicit killed $gp :: (dereferenceable load (s32) from @g3, align 8)
$r6 = A2_tfr $r5
$r7 = A2_and killed $r8, killed $r7
$r5 = M2_accii killed $r5, $r2, 1248
$r6 = M2_accii killed $r6, $r2, 1152
$d0 = A2_tfrpi 0
bb.2:
successors: %bb.3(0x04000000), %bb.2(0x7c000000)
liveins: $lc0:0x00000004, $r0:0x00000001, $r1:0x00000001, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r5:0x00000001, $r6:0x00000001, $r7:0x00000001, $sa0:0x00000004
$r8 = S2_cl0 $r7
$r12 = L2_loadrubgp @g4, implicit $gp :: (dereferenceable load (s8) from @g4)
$r7 = S2_setbit_r killed $r7, $r8
$r9 = L4_loadri_rr $r6, $r8, 2 :: (load (s32) from %ir.v14)
$r13 = L4_loadri_rr $r5, killed $r8, 2 :: (load (s32) from %ir.v17)
$d4 = M2_vmpy2s_s0 killed $r9, $r9, implicit-def dead $usr_ovf
$p0 = S2_tstbit_i killed $r12, 0
$d4 = M2_vmac2s_s0 killed $d4, killed $r13, $r13, implicit-def dead $usr_ovf
$p1 = C2_cmpeqi $r7, 0
$d6 = A2_vaddws $d0, $d4, implicit-def dead $usr_ovf
$d0 = A2_tfrpt $p0, killed $d0, implicit $d0
S4_pstorerdf_abs $p0, @g0, $d6, implicit killed $gp :: (store (s64) into @g0)
$d0 = A2_tfrpf killed $p0, killed $d6, implicit killed $d0
J2_jumpf killed $p1, %bb.2, implicit-def dead $pc
bb.3:
successors: %bb.4(0x04000000), %bb.1(0x7c000000)
liveins: $lc0:0x00000004, $r2:0x00000001, $r3:0x00000001, $r4:0x00000001, $r8:0x00000001, $r9:0x00000001, $sa0:0x00000004
$r3 = A2_addi killed $r3, 1
$r3 = A4_modwrapu killed $r3, $r4
ENDLOOP0 %bb.1, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
bb.4:
liveins: $r8:0x00000001, $r9:0x00000001
S2_storerdgp @g1, killed $d4, implicit killed $gp :: (store (s64) into @g1)
PS_jmpret killed $r31, implicit-def dead $pc
...
|