1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @uadd_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @uadd_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
; GFX9-LABEL: @uadd_sat_v2i16(
; GFX9-NEXT: bb:
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @usub_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @usub_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
; GFX9-LABEL: @usub_sat_v2i16(
; GFX9-NEXT: bb:
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @sadd_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @sadd_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
; GFX9-LABEL: @sadd_sat_v2i16(
; GFX9-NEXT: bb:
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
; GFX7-LABEL: @ssub_sat_v2i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: ret <2 x i16> [[INS_1]]
;
; GFX8-LABEL: @ssub_sat_v2i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX8-NEXT: ret <2 x i16> [[TMP0]]
;
; GFX9-LABEL: @ssub_sat_v2i16(
; GFX9-NEXT: bb:
; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
; GFX9-NEXT: ret <2 x i16> [[TMP0]]
;
bb:
%arg0.0 = extractelement <2 x i16> %arg0, i64 0
%arg0.1 = extractelement <2 x i16> %arg0, i64 1
%arg1.0 = extractelement <2 x i16> %arg1, i64 0
%arg1.1 = extractelement <2 x i16> %arg1, i64 1
%add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1)
%ins.0 = insertelement <2 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
ret <2 x i16> %ins.1
}
define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @uadd_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @usub_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @sadd_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
; GCN-LABEL: @ssub_sat_v2i32(
; GCN-NEXT: bb:
; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
; GCN-NEXT: ret <2 x i32> [[INS_1]]
;
bb:
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
%arg1.0 = extractelement <2 x i32> %arg1, i64 0
%arg1.1 = extractelement <2 x i32> %arg1, i64 1
%add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0)
%add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1)
%ins.0 = insertelement <2 x i32> poison, i32 %add.0, i64 0
%ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
ret <2 x i32> %ins.1
}
define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
; GFX7-LABEL: @uadd_sat_v3i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
; GFX7-NEXT: ret <3 x i16> [[INS_2]]
;
; GFX8-LABEL: @uadd_sat_v3i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
;
; GFX9-LABEL: @uadd_sat_v3i16(
; GFX9-NEXT: bb:
; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
; GFX9-NEXT: ret <3 x i16> [[INS_2]]
;
bb:
%arg0.0 = extractelement <3 x i16> %arg0, i64 0
%arg0.1 = extractelement <3 x i16> %arg0, i64 1
%arg0.2 = extractelement <3 x i16> %arg0, i64 2
%arg1.0 = extractelement <3 x i16> %arg1, i64 0
%arg1.1 = extractelement <3 x i16> %arg1, i64 1
%arg1.2 = extractelement <3 x i16> %arg1, i64 2
%add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
%ins.0 = insertelement <3 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
%ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
ret <3 x i16> %ins.2
}
define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
; GFX7-LABEL: @uadd_sat_v4i16(
; GFX7-NEXT: bb:
; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
; GFX7-NEXT: ret <4 x i16> [[INS_3]]
;
; GFX8-LABEL: @uadd_sat_v4i16(
; GFX8-NEXT: bb:
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
;
; GFX9-LABEL: @uadd_sat_v4i16(
; GFX9-NEXT: bb:
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; GFX9-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; GFX9-NEXT: ret <4 x i16> [[INS_31]]
;
bb:
%arg0.0 = extractelement <4 x i16> %arg0, i64 0
%arg0.1 = extractelement <4 x i16> %arg0, i64 1
%arg0.2 = extractelement <4 x i16> %arg0, i64 2
%arg0.3 = extractelement <4 x i16> %arg0, i64 3
%arg1.0 = extractelement <4 x i16> %arg1, i64 0
%arg1.1 = extractelement <4 x i16> %arg1, i64 1
%arg1.2 = extractelement <4 x i16> %arg1, i64 2
%arg1.3 = extractelement <4 x i16> %arg1, i64 3
%add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
%add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
%add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
%add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3)
%ins.0 = insertelement <4 x i16> poison, i16 %add.0, i64 0
%ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
%ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
%ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
ret <4 x i16> %ins.3
}
declare i16 @llvm.uadd.sat.i16(i16, i16) #0
declare i16 @llvm.usub.sat.i16(i16, i16) #0
declare i16 @llvm.sadd.sat.i16(i16, i16) #0
declare i16 @llvm.ssub.sat.i16(i16, i16) #0
declare i32 @llvm.uadd.sat.i32(i32, i32) #0
declare i32 @llvm.usub.sat.i32(i32, i32) #0
declare i32 @llvm.sadd.sat.i32(i32, i32) #0
declare i32 @llvm.ssub.sat.i32(i32, i32) #0
attributes #0 = { nounwind readnone speculatable willreturn }
|