File: vector-getelementptr.ll

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (60 lines) | stat: -rw-r--r-- 3,317 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -mtriple=arm64-apple-ios -S -passes=slp-vectorizer < %s | FileCheck %s
; vectorization requires a vector GEP + extracts, but the cost is offset by being able to efficiently vectorize the rest of the tree

define void @should_vectorize_gep(ptr %base1, ptr %base2, ptr %base_gep) {
; CHECK-LABEL: define void @should_vectorize_gep
; CHECK-SAME: (ptr [[BASE1:%.*]], ptr [[BASE2:%.*]], ptr [[BASE_GEP:%.*]]) {
; CHECK-NEXT:  bb:
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE1]], align 2
; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i32> [[TMP0]] to <4 x i64>
; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[BASE2]], align 2
; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64>
; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i64> [[TMP1]], [[TMP3]]
; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
; CHECK-NEXT:    [[GETELEMENTPTR_RES_1:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[TMP5]]
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1
; CHECK-NEXT:    [[GETELEMENTPTR_RES_2:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[TMP6]]
; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 2
; CHECK-NEXT:    [[GETELEMENTPTR_RES_3:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[TMP7]]
; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP4]], i32 3
; CHECK-NEXT:    [[GETELEMENTPTR_RES_4:%.*]] = getelementptr i32, ptr [[BASE_GEP]], i64 [[TMP8]]
; CHECK-NEXT:    call void @use_4(ptr [[GETELEMENTPTR_RES_1]], ptr [[GETELEMENTPTR_RES_2]], ptr [[GETELEMENTPTR_RES_3]], ptr [[GETELEMENTPTR_RES_4]])
; CHECK-NEXT:    ret void
;
bb:
  %load1 = load i32, ptr %base1, align 2
  %zext1 = zext i32 %load1 to i64
  %load2 = load i32, ptr %base2, align 2
  %zext2 = zext i32 %load2 to i64
  %sub = sub i64 %zext1, %zext2
  %getelementptr.res.1 = getelementptr i32, ptr %base_gep, i64 %sub
  %getelementptr1 = getelementptr i32, ptr %base1, i64 1
  %getelementptr2 = getelementptr i32, ptr %base2, i64 1
  %load3 = load i32, ptr %getelementptr1, align 2
  %zext3 = zext i32 %load3 to i64
  %load4 = load i32, ptr %getelementptr2, align 2
  %zext4= zext i32 %load4 to i64
  %sub2 = sub i64 %zext3, %zext4
  %getelementptr.res.2 = getelementptr i32, ptr %base_gep, i64 %sub2
  %getelementptr3 = getelementptr i32, ptr %base1, i64 2
  %getelementptr4 = getelementptr i32, ptr %base2, i64 2
  %load5 = load i32, ptr %getelementptr3, align 2
  %zext5 = zext i32 %load5 to i64
  %load6 = load i32, ptr %getelementptr4, align 2
  %zext6 = zext i32 %load6 to i64
  %sub3 = sub i64 %zext5, %zext6
  %getelementptr.res.3 = getelementptr i32, ptr %base_gep, i64 %sub3
  %getelementptr5 = getelementptr i32, ptr %base1, i64 3
  %getelementptr6 = getelementptr i32, ptr %base2, i64 3
  %load7 = load i32, ptr %getelementptr5, align 2
  %zext7 = zext i32 %load7 to i64
  %load8 = load i32, ptr %getelementptr6, align 2
  %zext8 = zext i32 %load8 to i64
  %sub4 = sub i64 %zext7, %zext8
  %getelementptr.res.4 = getelementptr i32, ptr %base_gep, i64 %sub4
  call void @use_4(ptr %getelementptr.res.1, ptr %getelementptr.res.2, ptr %getelementptr.res.3, ptr %getelementptr.res.4)
  ret void
}

declare void @use_4(ptr, ptr, ptr, ptr)