File: neon-intrinsics.ll

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.8-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,696 kB
  • sloc: cpp: 7,438,781; ansic: 1,393,871; asm: 1,012,926; python: 241,771; f90: 86,635; objc: 75,411; lisp: 42,144; pascal: 17,286; sh: 8,596; ml: 5,082; perl: 4,730; makefile: 3,591; awk: 3,523; javascript: 2,251; xml: 892; fortran: 672
file content (67 lines) | stat: -rw-r--r-- 3,631 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -mtriple=arm -S | FileCheck %s

; The alignment arguments for NEON load/store intrinsics can be increased
; by instcombine.  Check for this.

@x = common global [8 x i32] zeroinitializer, align 32
@y = common global [8 x i32] zeroinitializer, align 16

define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr nonnull @x, i32 32)
; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 0
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 1
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 2
; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP1]], 3
; CHECK-NEXT:    call void @llvm.arm.neon.vst4.p0.v2i32(ptr nonnull @y, <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 16)
; CHECK-NEXT:    ret void
;
  %tmp1 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr @x, i32 1)
  %tmp2 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 0
  %tmp3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 1
  %tmp4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 2
  %tmp5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %tmp1, 3
  call void @llvm.arm.neon.vst4.p0.v2i32(ptr @y, <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
  ret void
}

define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(ptr align 16 %a) {
; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_no_align(
; CHECK-SAME: ptr align 16 [[A:%.*]]) {
; CHECK-NEXT:    [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]])
; CHECK-NEXT:    ret { <4 x i16>, <4 x i16> } [[TMP]]
;
  %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr %a)
  ret { <4 x i16>, <4 x i16> } %tmp
}

define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align(ptr align 16 %a) {
; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_lower_align(
; CHECK-SAME: ptr align 16 [[A:%.*]]) {
; CHECK-NEXT:    [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]])
; CHECK-NEXT:    ret { <4 x i16>, <4 x i16> } [[TMP]]
;
  %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 8 %a)
  ret { <4 x i16>, <4 x i16> } %tmp
}

define { <4 x i16>, <4 x i16> } @test_vld1x2_higher_align(ptr align 8 %a) {
; CHECK-LABEL: define { <4 x i16>, <4 x i16> } @test_vld1x2_higher_align(
; CHECK-SAME: ptr align 8 [[A:%.*]]) {
; CHECK-NEXT:    [[TMP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 [[A]])
; CHECK-NEXT:    ret { <4 x i16>, <4 x i16> } [[TMP]]
;
  %tmp = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld1x2.v4i16.p0(ptr align 16 %a)
  ret { <4 x i16>, <4 x i16> } %tmp
}

define void @test_vst1x2_no_align(ptr align 16 %a, <4 x i16> %b0, <4 x i16> %b1) {
; CHECK-LABEL: define void @test_vst1x2_no_align(
; CHECK-SAME: ptr align 16 [[A:%.*]], <4 x i16> [[B0:%.*]], <4 x i16> [[B1:%.*]]) {
; CHECK-NEXT:    call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr align 16 [[A]], <4 x i16> [[B0]], <4 x i16> [[B1]])
; CHECK-NEXT:    ret void
;
  call void @llvm.arm.neon.vst1x2.p0.v4i16(ptr %a, <4 x i16> %b0, <4 x i16> %b1)
  ret void
}