File: pmull-ldr-merge.ll

package info (click to toggle)
llvm-toolchain-16 1%3A16.0.6-15~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,634,792 kB
  • sloc: cpp: 6,179,261; ansic: 1,216,205; asm: 741,319; python: 196,614; objc: 75,325; f90: 49,640; lisp: 32,396; pascal: 12,286; sh: 9,394; perl: 7,442; ml: 5,494; awk: 3,523; makefile: 2,723; javascript: 1,206; xml: 886; fortran: 581; cs: 573
file content (65 lines) | stat: -rw-r--r-- 2,637 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs  -mtriple=aarch64-linux-gnu -mattr=+aes -o - %s| FileCheck %s --check-prefixes=CHECK

; Two operands are in scalar form.
; Tests that both operands are loaded into SIMD registers directly as opposed to being loaded into GPR followed by a fmov.
define void @test1(ptr %0, i64 %1, i64 %2) {
; CHECK-LABEL: test1:
; CHECK:       // %bb.0:
; CHECK-NEXT:    add x8, x0, x2, lsl #4
; CHECK-NEXT:    add x9, x0, x1, lsl #4
; CHECK-NEXT:    ldr d0, [x8, #8]
; CHECK-NEXT:    ldr d1, [x9, #8]
; CHECK-NEXT:    pmull v0.1q, v1.1d, v0.1d
; CHECK-NEXT:    str q0, [x0]
; CHECK-NEXT:    ret
  %4 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1
  %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1
  %6 = load i64, ptr %5, align 8
  %7 = getelementptr inbounds <2 x i64>, ptr %0, i64 %2, i64 1
  %8 = load i64, ptr %7, align 8
  %9 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %6, i64 %8)
  store <16 x i8> %9, ptr %0, align 16
  ret void
}

; Operand %8 is higher-half of v2i64, and operand %7 is a scalar load.
; Tests that operand is loaded into SIMD registers directly as opposed to being loaded into GPR followed by a fmov.
define void @test2(ptr %0, i64 %1, i64 %2, <2 x i64> %3) {
; CHECK-LABEL: test2:
; CHECK:       // %bb.0:
; CHECK-NEXT:    add x8, x0, x1, lsl #4
; CHECK-NEXT:    add x9, x8, #8
; CHECK-NEXT:    ld1r { v1.2d }, [x9]
; CHECK-NEXT:    pmull2 v0.1q, v0.2d, v1.2d
; CHECK-NEXT:    str q0, [x8]
; CHECK-NEXT:    ret
  %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1
  %6 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1
  %7 = load i64, ptr %6, align 8
  %8 = extractelement <2 x i64> %3, i64 1
  %9 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %8, i64 %7)
  store <16 x i8> %9, ptr %5, align 16
  ret void
}

; Operand %7 is a scalar load, and operand %3 is an input parameter of function `test4`.
; Test that %7 is loaded into SIMD registers.
define void @test3(ptr %0, i64 %1, i64 %2, i64 %3) {
; CHECK-LABEL: test3:
; CHECK:       // %bb.0:
; CHECK-NEXT:    add x8, x0, x1, lsl #4
; CHECK-NEXT:    fmov d0, x3
; CHECK-NEXT:    ldr d1, [x8, #8]
; CHECK-NEXT:    pmull v0.1q, v1.1d, v0.1d
; CHECK-NEXT:    str q0, [x8]
; CHECK-NEXT:    ret
  %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1
  %6 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1
  %7 = load i64, ptr %6, align 8
  %8 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %7, i64 %3)
  store <16 x i8> %8, ptr %5, align 16
  ret void
}

declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)