File: rvv-vmerge-to-vmv.ll

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.4-5
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,236,516 kB
  • sloc: cpp: 7,619,569; ansic: 1,433,956; asm: 1,058,748; python: 252,181; f90: 94,671; objc: 70,753; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,585; awk: 3,523; javascript: 2,272; xml: 892; fortran: 770
file content (101 lines) | stat: -rw-r--r-- 4,904 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s

define <vscale x 1 x i8> @vpmerge_mf8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %y, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_mf8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v9
; CHECK-NEXT:    ret
  %1 = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %x, i32 %vl)
  ret <vscale x 1 x i8> %1
}

define <vscale x 2 x i8> @vpmerge_mf4(<vscale x 2 x i8> %x, <vscale x 2 x i8> %y, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_mf4:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v9
; CHECK-NEXT:    ret
  %1 = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %x, i32 %vl)
  ret <vscale x 2 x i8> %1
}

define <vscale x 4 x i8> @vpmerge_mf2(<vscale x 4 x i8> %x, <vscale x 4 x i8> %y, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_mf2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v9
; CHECK-NEXT:    ret
  %1 = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %x, i32 %vl)
  ret <vscale x 4 x i8> %1
}

define <vscale x 8 x i8> @vpmerge_m1(<vscale x 8 x i8> %x, <vscale x 8 x i8> %y, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_m1:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v9
; CHECK-NEXT:    ret
  %1 = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %x, i32 %vl)
  ret <vscale x 8 x i8> %1
}

define <vscale x 8 x i16> @vpmerge_m2(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_m2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v10
; CHECK-NEXT:    ret
  %1 = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %x, i32 %vl)
  ret <vscale x 8 x i16> %1
}

define <vscale x 8 x i32> @vpmerge_m4(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_m4:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v12
; CHECK-NEXT:    ret
  %1 = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %x, i32 %vl)
  ret <vscale x 8 x i32> %1
}

define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64> %y, i32 zeroext %vl) {
; CHECK-LABEL: vpmerge_m8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v16
; CHECK-NEXT:    ret
  %1 = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %x, i32 %vl)
  ret <vscale x 8 x i64> %1
}

; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) {
; CHECK-LABEL: preserve_false:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT:    vmv1r.v v10, v9
; CHECK-NEXT:    vle32.v v10, (a0), v0.t
; CHECK-NEXT:    vsetvli zero, a2, e32, m1, tu, ma
; CHECK-NEXT:    vmerge.vvm v8, v9, v10, v0
; CHECK-NEXT:    ret
  %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3)
  %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2)
  ret <vscale x 2 x i32> %res
}

; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: preserve_false_avl_known_le:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v9, (a0), v0.t
; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT:    vmv.v.v v8, v9
; CHECK-NEXT:    ret
  %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3)
  %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1)
  ret <vscale x 2 x i32> %res
}