1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; Reduce a 64-bit sub by a constant if we know the low 32-bits are all
; zero.
; sub i64:x, K if computeTrailingZeros(K) >= 32
; => build_pair (sub x.hi, K.hi), x.lo
define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_0(i64 inreg %reg) {
; GFX9-LABEL: s_sub_i64_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 0xfffc0000
; GFX9-NEXT: ; return to shader part epilog
%sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
ret i64 %sub
}
define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_1(i64 inreg %reg) {
; GFX9-LABEL: s_sub_i64_const_low_bits_known0_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, -1
; GFX9-NEXT: ; return to shader part epilog
%sub = sub i64 %reg, 4294967296 ; (1 << 32)
ret i64 %sub
}
define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_2(i64 inreg %reg) {
; GFX9-LABEL: s_sub_i64_const_low_bits_known0_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, -2
; GFX9-NEXT: ; return to shader part epilog
%sub = sub i64 %reg, 8589934592 ; (1 << 33)
ret i64 %sub
}
define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_3(i64 inreg %reg) {
; GFX9-LABEL: s_sub_i64_const_low_bits_known0_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 0x80000000
; GFX9-NEXT: ; return to shader part epilog
%sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
ret i64 %sub
}
define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_4(i64 inreg %reg) {
; GFX9-LABEL: s_sub_i64_const_low_bits_known0_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, 1
; GFX9-NEXT: ; return to shader part epilog
%sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
ret i64 %sub
}
define i64 @v_sub_i64_const_low_bits_known0_0(i64 %reg) {
; GFX9-LABEL: v_sub_i64_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 0xfffc0000, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
ret i64 %sub
}
define i64 @v_sub_i64_const_low_bits_known0_1(i64 %reg) {
; GFX9-LABEL: v_sub_i64_const_low_bits_known0_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub i64 %reg, 4294967296 ; (1 << 32)
ret i64 %sub
}
define i64 @v_sub_i64_const_low_bits_known0_2(i64 %reg) {
; GFX9-LABEL: v_sub_i64_const_low_bits_known0_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, -2, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub i64 %reg, 8589934592 ; (1 << 33)
ret i64 %sub
}
define i64 @v_sub_i64_const_low_bits_known0_3(i64 %reg) {
; GFX9-LABEL: v_sub_i64_const_low_bits_known0_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 0x80000000, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
ret i64 %sub
}
define i64 @v_sub_i64_const_low_bits_known0_4(i64 %reg) {
; GFX9-LABEL: v_sub_i64_const_low_bits_known0_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
ret i64 %sub
}
define amdgpu_ps i64 @s_sub_i64_const_high_bits_known0_0(i64 inreg %reg) {
; GFX9-LABEL: s_sub_i64_const_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_u32 s0, s0, 1
; GFX9-NEXT: s_addc_u32 s1, s1, -1
; GFX9-NEXT: ; return to shader part epilog
%sub = sub i64 %reg, 4294967295 ; (1 << 31)
ret i64 %sub
}
define i64 @v_sub_i64_const_high_bits_known0_0(i64 %reg) {
; GFX9-LABEL: v_sub_i64_const_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub i64 %reg, 4294967295 ; (1 << 31)
ret i64 %sub
}
define <2 x i64> @v_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
; GFX9-LABEL: v_sub_v2i64_splat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
ret <2 x i64> %sub
}
define <2 x i64> @v_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
; GFX9-LABEL: v_sub_v2i64_nonsplat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
; GFX9-NEXT: v_add_u32_e32 v3, -2, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
%sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
ret <2 x i64> %sub
}
define amdgpu_ps <2 x i64> @s_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
; GFX9-LABEL: s_sub_v2i64_splat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, -1
; GFX9-NEXT: s_add_i32 s3, s3, -1
; GFX9-NEXT: ; return to shader part epilog
%sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
ret <2 x i64> %sub
}
define amdgpu_ps <2 x i64> @s_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
; GFX9-LABEL: s_sub_v2i64_nonsplat_const_low_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_add_i32 s1, s1, -1
; GFX9-NEXT: s_add_i32 s3, s3, -2
; GFX9-NEXT: ; return to shader part epilog
%sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
ret <2 x i64> %sub
}
; We could reduce this to use a 32-bit sub if we use computeKnownBits
define i64 @v_sub_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
; GFX9-LABEL: v_sub_i64_variable_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, 0, v0
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
%in.high.bits = shl i64 %zext.offset.hi32, 32
%sub = sub i64 %reg, %in.high.bits
ret i64 %sub
}
; We could reduce this to use a 32-bit sub if we use computeKnownBits
define amdgpu_ps i64 @s_sub_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
; GFX9-LABEL: s_sub_i64_variable_high_bits_known0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_sub_u32 s0, s0, 0
; GFX9-NEXT: s_subb_u32 s1, s1, s2
; GFX9-NEXT: ; return to shader part epilog
%zext.offset.hi32 = zext i32 %offset.hi32 to i64
%in.high.bits = shl i64 %zext.offset.hi32, 32
%sub = sub i64 %reg, %in.high.bits
ret i64 %sub
}
|