1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
define amdgpu_ps float @while_break(i32 %z, float %v, i32 %x, i32 %y) #0 {
; GCN-LABEL: while_break:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_mov_b32 s1, -1
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %Flow2
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GCN-NEXT: s_and_b32 s2, exec_lo, s3
; GCN-NEXT: s_or_b32 s0, s2, s0
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; GCN-NEXT: s_cbranch_execz .LBB0_8
; GCN-NEXT: .LBB0_2: ; %header
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_add_i32 s1, s1, 1
; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: v_cmp_ge_i32_e32 vcc_lo, s1, v2
; GCN-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GCN-NEXT: s_xor_b32 s3, exec_lo, s3
; GCN-NEXT: ; %bb.3: ; %else
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v3
; GCN-NEXT: s_and_b32 s2, vcc_lo, exec_lo
; GCN-NEXT: ; %bb.4: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_andn2_saveexec_b32 s3, s3
; GCN-NEXT: ; %bb.5: ; %if
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
; GCN-NEXT: s_or_b32 s2, s2, exec_lo
; GCN-NEXT: ; %bb.6: ; %Flow1
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GCN-NEXT: s_mov_b32 s3, -1
; GCN-NEXT: s_and_saveexec_b32 s4, s2
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.7: ; %latch
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v0
; GCN-NEXT: s_orn2_b32 s3, vcc_lo, exec_lo
; GCN-NEXT: s_branch .LBB0_1
; GCN-NEXT: .LBB0_8: ; %end
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: ; return to shader part epilog
entry:
br label %header
header:
%v.1 = phi float [ %v, %entry ], [ %v.2, %latch ]
%ind = phi i32 [ 0, %entry], [ %ind.inc, %latch ]
%cc = icmp slt i32 %ind, %x
br i1 %cc, label %if, label %else
if:
%v.if = fadd float %v.1, 1.0
br label %latch
else:
%cc2 = icmp slt i32 %ind, %y
br i1 %cc2, label %latch, label %end
latch:
%v.2 = phi float [ %v.if, %if ], [ %v.1, %else ]
%ind.inc = add i32 %ind, 1
%cc3 = icmp slt i32 %ind, %z
br i1 %cc3, label %end, label %header
end:
%r = phi float [ %v.2, %latch ], [ %v.1, %else ]
ret float %r
}
; Just different dfs order from while_break.
define amdgpu_ps float @while_break2(i32 %z, float %v, i32 %x, i32 %y) #0 {
; GCN-LABEL: while_break2:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_mov_b32 s1, -1
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_branch .LBB1_2
; GCN-NEXT: .LBB1_1: ; %Flow2
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GCN-NEXT: s_and_b32 s2, exec_lo, s3
; GCN-NEXT: s_or_b32 s0, s2, s0
; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
; GCN-NEXT: s_cbranch_execz .LBB1_8
; GCN-NEXT: .LBB1_2: ; %header
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_add_i32 s1, s1, 1
; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: v_cmp_ge_i32_e32 vcc_lo, s1, v2
; GCN-NEXT: s_and_saveexec_b32 s3, vcc_lo
; GCN-NEXT: s_xor_b32 s3, exec_lo, s3
; GCN-NEXT: ; %bb.3: ; %if
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
; GCN-NEXT: s_mov_b32 s2, exec_lo
; GCN-NEXT: ; %bb.4: ; %Flow
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_andn2_saveexec_b32 s3, s3
; GCN-NEXT: ; %bb.5: ; %else
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v3
; GCN-NEXT: s_andn2_b32 s2, s2, exec_lo
; GCN-NEXT: s_and_b32 s4, vcc_lo, exec_lo
; GCN-NEXT: s_or_b32 s2, s2, s4
; GCN-NEXT: ; %bb.6: ; %Flow1
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s3
; GCN-NEXT: s_mov_b32 s3, -1
; GCN-NEXT: s_and_saveexec_b32 s4, s2
; GCN-NEXT: s_cbranch_execz .LBB1_1
; GCN-NEXT: ; %bb.7: ; %latch
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: v_cmp_lt_i32_e32 vcc_lo, s1, v0
; GCN-NEXT: s_orn2_b32 s3, vcc_lo, exec_lo
; GCN-NEXT: s_branch .LBB1_1
; GCN-NEXT: .LBB1_8: ; %end
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: ; return to shader part epilog
entry:
br label %header
header:
%v.1 = phi float [ %v, %entry ], [ %v.2, %latch ]
%ind = phi i32 [ 0, %entry], [ %ind.inc, %latch ]
%cc = icmp slt i32 %ind, %x
br i1 %cc, label %else, label %if
if:
%v.if = fadd float %v.1, 1.0
br label %latch
else:
%cc2 = icmp slt i32 %ind, %y
br i1 %cc2, label %latch, label %end
latch:
%v.2 = phi float [ %v.if, %if ], [ %v.1, %else ]
%ind.inc = add i32 %ind, 1
%cc3 = icmp slt i32 %ind, %z
br i1 %cc3, label %end, label %header
end:
%r = phi float [ %v.2, %latch ], [ %v.1, %else ]
ret float %r
}
attributes #0 = { nounwind }
|