File: xop-shifts.ll

package info (click to toggle)
llvm-toolchain-14 1%3A14.0.6-12
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,496,180 kB
  • sloc: cpp: 5,593,972; ansic: 986,872; asm: 585,869; python: 184,223; objc: 72,530; lisp: 31,119; f90: 27,793; javascript: 9,780; pascal: 9,762; sh: 9,482; perl: 7,468; ml: 5,432; awk: 3,523; makefile: 2,538; xml: 953; cs: 573; fortran: 567
file content (73 lines) | stat: -rw-r--r-- 3,558 bytes parent folder | download | duplicates (16)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop | FileCheck %s

;
; SimplifyDemandedVectorEltsForTargetNode Handling
;

define <16 x i8> @demandedelts_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: demandedelts_vpshab:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vpshab %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %shuffle = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  %shift = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %shuffle, <16 x i8> %a1)
  %res = shufflevector <16 x i8> %shift, <16 x i8> undef, <16 x i32> zeroinitializer
  ret <16 x i8> %res
}

define <4 x i32> @demandedelts_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: demandedelts_vpshld:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vpshld %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT:    retq
  %shuffle = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> zeroinitializer
  %shift = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %shuffle)
  %result = shufflevector <4 x i32> %shift, <4 x i32> undef, <4 x i32> zeroinitializer
  ret <4 x i32> %result
}

;
; isBinOp Handling
;

define <8 x i16> @binop_shuffle_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: binop_shuffle_vpshaw:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    retq
  %shuffle0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
  %shuffle1 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
  %shift = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %shuffle0, <8 x i16> %shuffle1)
  %result = shufflevector <8 x i16> %shift, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i16> %result
}

; TODO - canonicalizeShuffleWithBinOps - handle scaled shuffle masks.
define <2 x i64> @binop_shuffle_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: binop_shuffle_vpshlq:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; CHECK-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT:    retq
  %shuffle0 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
  %shuffle1 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
  %shift = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %shuffle0, <2 x i64> %shuffle1)
  %result = shufflevector <2 x i64> %shift, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
  ret <2 x i64> %result
}

declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone

declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone