File: minbw-with-and-and-scalar-trunc.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (43 lines) | stat: -rw-r--r-- 2,138 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s
@c = global [12 x i64] zeroinitializer

; FIXME: after minbitwidth analysis and i32 conv.., 65535 is transformed to
; and <4 x i16> , -1, which must be dropped.
; FIXME: need to adjust the cost of the final transformation, since the user is
; just a trunc to i16 (it must be free).
define i16 @test() {
; CHECK-LABEL: define i16 @test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16>
; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1>
; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]])
; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[TMP4]] to i32
; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[TMP5]] to i16
; CHECK-NEXT:    ret i16 [[T]]
;
entry:
  %0 = load i64, ptr @c, align 8
  %conv = trunc i64 %0 to i32
  %conv3 = and i32 %conv, 65535
  %conv4 = xor i32 %conv3, 65535
  %1 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 3), align 8
  %conv.1 = trunc i64 %1 to i32
  %conv3.1 = and i32 %conv.1, 65535
  %conv4.1 = xor i32 %conv3.1, 65535
  %.conv4.1 = tail call i32 @llvm.umax.i32(i32 %conv4, i32 %conv4.1)
  %2 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 6), align 8
  %conv.2 = trunc i64 %2 to i32
  %conv3.2 = and i32 %conv.2, 65535
  %conv4.2 = xor i32 %conv3.2, 65535
  %.conv4.2 = tail call i32 @llvm.umax.i32(i32 %.conv4.1, i32 %conv4.2)
  %3 = load i64, ptr getelementptr inbounds ([12 x i64], ptr @c, i64 0, i64 9), align 8
  %conv.3 = trunc i64 %3 to i32
  %conv3.3 = and i32 %conv.3, 65535
  %conv4.3 = xor i32 %conv3.3, 65535
  %.conv4.3 = tail call i32 @llvm.umax.i32(i32 %.conv4.2, i32 %conv4.3)
  %t = trunc i32 %.conv4.3 to i16
  ret i16 %t
}