File: slice-width.ll

package info (click to toggle)
llvm-toolchain-15 1%3A15.0.6-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,554,644 kB
  • sloc: cpp: 5,922,452; ansic: 1,012,136; asm: 674,362; python: 191,568; objc: 73,855; f90: 42,327; lisp: 31,913; pascal: 11,973; javascript: 10,144; sh: 9,421; perl: 7,447; ml: 5,527; awk: 3,523; makefile: 2,520; xml: 885; cs: 573; fortran: 567
file content (159 lines) | stat: -rw-r--r-- 6,589 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"

declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind

; This tests that allocas are not split into slices that are not byte width multiple
define void @no_split_on_non_byte_width(i32) {
; CHECK-LABEL: @no_split_on_non_byte_width(
; CHECK-NEXT:    [[ARG_SROA_0:%.*]] = alloca i8, align 8
; CHECK-NEXT:    [[ARG_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0:%.*]] to i8
; CHECK-NEXT:    store i8 [[ARG_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT:    [[ARG_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[TMP0]], 8
; CHECK-NEXT:    [[ARG_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG_SROA_3_0_EXTRACT_SHIFT]] to i24
; CHECK-NEXT:    br label [[LOAD_I32:%.*]]
; CHECK:       load_i32:
; CHECK-NEXT:    [[ARG_SROA_0_0_ARG_SROA_0_0_R01:%.*]] = load i8, ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT:    br label [[LOAD_I1:%.*]]
; CHECK:       load_i1:
; CHECK-NEXT:    [[ARG_SROA_0_0_ARG_SROA_0_0_T1:%.*]] = load i1, ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT:    ret void
;
  %arg = alloca i32 , align 8
  store i32 %0, ptr %arg
  br label %load_i32

load_i32:
  %r0 = load i32, ptr %arg
  br label %load_i1

load_i1:
  %t1 = load i1, ptr %arg
  ret void
}

; PR18726: Check that we use memcpy and memset to fill out padding when we have
; a slice with a simple single type whose store size is smaller than the slice
; size.

%union.Foo = type { x86_fp80, i64, i64 }

@foo_copy_source = external constant %union.Foo
@i64_sink = global i64 0

define void @memcpy_fp80_padding() {
; CHECK-LABEL: @memcpy_fp80_padding(
; CHECK-NEXT:    [[X_SROA_0:%.*]] = alloca x86_fp80, align 16
; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[X_SROA_0]], ptr align 16 @foo_copy_source, i32 16, i1 false)
; CHECK-NEXT:    [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 16), align 16
; CHECK-NEXT:    [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 24), align 8
; CHECK-NEXT:    store i64 [[X_SROA_1_0_COPYLOAD]], ptr @i64_sink, align 4
; CHECK-NEXT:    ret void
;
  %x = alloca %union.Foo

  ; Copy from a global.
  call void @llvm.memcpy.p0.p0.i32(ptr align 16 %x, ptr align 16 @foo_copy_source, i32 32, i1 false)

  ; Access a slice of the alloca to trigger SROA.
  %mid_p = getelementptr %union.Foo, ptr %x, i32 0, i32 1
  %elt = load i64, ptr %mid_p
  store i64 %elt, ptr @i64_sink
  ret void
}

define void @memset_fp80_padding() {
; CHECK-LABEL: @memset_fp80_padding(
; CHECK-NEXT:    [[X_SROA_0:%.*]] = alloca x86_fp80, align 16
; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr align 16 [[X_SROA_0]], i8 -1, i32 16, i1 false)
; CHECK-NEXT:    store i64 -1, ptr @i64_sink, align 4
; CHECK-NEXT:    ret void
;
  %x = alloca %union.Foo

  ; Set to all ones.
  call void @llvm.memset.p0.i32(ptr align 16 %x, i8 -1, i32 32, i1 false)

  ; Access a slice of the alloca to trigger SROA.
  %mid_p = getelementptr %union.Foo, ptr %x, i32 0, i32 1
  %elt = load i64, ptr %mid_p
  store i64 %elt, ptr @i64_sink
  ret void
}

%S.vec3float = type { float, float, float }
%U.vec3float = type { <4 x float> }

declare i32 @memcpy_vec3float_helper(ptr)

; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
; vector store, hence accidentally putting gibberish onto the stack.
define i32 @memcpy_vec3float_widening(ptr %x) {
; CHECK-LABEL: @memcpy_vec3float_widening(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT:    [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT:    [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef
; CHECK-NEXT:    [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4
; CHECK-NEXT:    [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT:    store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4
; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @memcpy_vec3float_helper(ptr [[TMP2]])
; CHECK-NEXT:    ret i32 [[RESULT]]
;
entry:
  ; Create a temporary variable %tmp1 and copy %x[0] into it
  %tmp1 = alloca %S.vec3float, align 4
  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %tmp1, ptr align 4 %x, i32 12, i1 false)

  ; The following block does nothing; but appears to confuse SROA
  %unused3 = load <4 x float>, ptr %tmp1, align 1

  ; Create a second temporary and copy %tmp1 into it
  %tmp2 = alloca %S.vec3float, align 4
  call void @llvm.memcpy.p0.p0.i32(ptr align 4 %tmp2, ptr align 4 %tmp1, i32 12, i1 false)

  %result = call i32 @memcpy_vec3float_helper(ptr %tmp2)
  ret i32 %result
}

; Don't crash on length that is constant expression.

define void @PR50888() {
; CHECK-LABEL: @PR50888(
; CHECK-NEXT:    [[ARRAY:%.*]] = alloca i8, align 1
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[ARRAY]], i8 0, i64 ptrtoint (ptr @PR50888 to i64), i1 false)
; CHECK-NEXT:    ret void
;
  %array = alloca i8
  call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 ptrtoint (ptr @PR50888 to i64), i1 false)
  ret void
}

; Don't crash on out-of-bounds length.

define void @PR50910() {
; CHECK-LABEL: @PR50910(
; CHECK-NEXT:    [[T1:%.*]] = alloca i8, i64 1, align 8
; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[T1]], i8 0, i64 1, i1 false)
; CHECK-NEXT:    ret void
;
  %t1 = alloca i8, i64 1, align 8
  call void @llvm.memset.p0.i64(ptr align 8 %t1, i8 0, i64 4294967296, i1 false)
  ret void
}

define i1 @presplit_overlarge_load() {
; CHECK-LABEL: @presplit_overlarge_load(
; CHECK-NEXT:    [[A_SROA_0:%.*]] = alloca i8, align 2
; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L11:%.*]] = load i8, ptr [[A_SROA_0]], align 2
; CHECK-NEXT:    [[A_SROA_0_0_A_SROA_0_0_L2:%.*]] = load i1, ptr [[A_SROA_0]], align 2
; CHECK-NEXT:    ret i1 [[A_SROA_0_0_A_SROA_0_0_L2]]
;
  %A = alloca i16
  %L1 = load i32, ptr %A
  %L2 = load i1, ptr %A
  ret i1 %L2
}