1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
; This tests that allocas are not split into slices that are not byte width multiple
define void @no_split_on_non_byte_width(i32) {
; CHECK-LABEL: @no_split_on_non_byte_width(
; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8
; CHECK-NEXT: [[ARG_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0:%.*]] to i8
; CHECK-NEXT: store i8 [[ARG_SROA_0_0_EXTRACT_TRUNC]], ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT: [[ARG_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[TMP0]], 8
; CHECK-NEXT: [[ARG_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[ARG_SROA_3_0_EXTRACT_SHIFT]] to i24
; CHECK-NEXT: br label [[LOAD_I32:%.*]]
; CHECK: load_i32:
; CHECK-NEXT: [[ARG_SROA_0_0_ARG_SROA_0_0_R01:%.*]] = load i8, ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT: br label [[LOAD_I1:%.*]]
; CHECK: load_i1:
; CHECK-NEXT: [[ARG_SROA_0_0_ARG_SROA_0_0_T1:%.*]] = load i1, ptr [[ARG_SROA_0]], align 8
; CHECK-NEXT: ret void
;
%arg = alloca i32 , align 8
store i32 %0, ptr %arg
br label %load_i32
load_i32:
%r0 = load i32, ptr %arg
br label %load_i1
load_i1:
%t1 = load i1, ptr %arg
ret void
}
; PR18726: Check that we use memcpy and memset to fill out padding when we have
; a slice with a simple single type whose store size is smaller than the slice
; size.
%union.Foo = type { x86_fp80, i64, i64 }
@foo_copy_source = external constant %union.Foo
@i64_sink = global i64 0
define void @memcpy_fp80_padding() {
; CHECK-LABEL: @memcpy_fp80_padding(
; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[X_SROA_0]], ptr align 16 @foo_copy_source, i32 16, i1 false)
; CHECK-NEXT: [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 16), align 16
; CHECK-NEXT: [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 24), align 8
; CHECK-NEXT: store i64 [[X_SROA_1_0_COPYLOAD]], ptr @i64_sink, align 4
; CHECK-NEXT: ret void
;
%x = alloca %union.Foo
; Copy from a global.
call void @llvm.memcpy.p0.p0.i32(ptr align 16 %x, ptr align 16 @foo_copy_source, i32 32, i1 false)
; Access a slice of the alloca to trigger SROA.
%mid_p = getelementptr %union.Foo, ptr %x, i32 0, i32 1
%elt = load i64, ptr %mid_p
store i64 %elt, ptr @i64_sink
ret void
}
define void @memset_fp80_padding() {
; CHECK-LABEL: @memset_fp80_padding(
; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16
; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[X_SROA_0]], i8 -1, i32 16, i1 false)
; CHECK-NEXT: store i64 -1, ptr @i64_sink, align 4
; CHECK-NEXT: ret void
;
%x = alloca %union.Foo
; Set to all ones.
call void @llvm.memset.p0.i32(ptr align 16 %x, i8 -1, i32 32, i1 false)
; Access a slice of the alloca to trigger SROA.
%mid_p = getelementptr %union.Foo, ptr %x, i32 0, i32 1
%elt = load i64, ptr %mid_p
store i64 %elt, ptr @i64_sink
ret void
}
%S.vec3float = type { float, float, float }
%U.vec3float = type { <4 x float> }
declare i32 @memcpy_vec3float_helper(ptr)
; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
; vector store, hence accidentally putting gibberish onto the stack.
define i32 @memcpy_vec3float_widening(ptr %x) {
; CHECK-LABEL: @memcpy_vec3float_widening(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef
; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @memcpy_vec3float_helper(ptr [[TMP2]])
; CHECK-NEXT: ret i32 [[RESULT]]
;
entry:
; Create a temporary variable %tmp1 and copy %x[0] into it
%tmp1 = alloca %S.vec3float, align 4
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %tmp1, ptr align 4 %x, i32 12, i1 false)
; The following block does nothing; but appears to confuse SROA
%unused3 = load <4 x float>, ptr %tmp1, align 1
; Create a second temporary and copy %tmp1 into it
%tmp2 = alloca %S.vec3float, align 4
call void @llvm.memcpy.p0.p0.i32(ptr align 4 %tmp2, ptr align 4 %tmp1, i32 12, i1 false)
%result = call i32 @memcpy_vec3float_helper(ptr %tmp2)
ret i32 %result
}
; Don't crash on length that is constant expression.
define void @PR50888() {
; CHECK-LABEL: @PR50888(
; CHECK-NEXT: [[ARRAY:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[ARRAY]], i8 0, i64 ptrtoint (ptr @PR50888 to i64), i1 false)
; CHECK-NEXT: ret void
;
%array = alloca i8
call void @llvm.memset.p0.i64(ptr align 16 %array, i8 0, i64 ptrtoint (ptr @PR50888 to i64), i1 false)
ret void
}
; Don't crash on out-of-bounds length.
define void @PR50910() {
; CHECK-LABEL: @PR50910(
; CHECK-NEXT: [[T1:%.*]] = alloca i8, i64 1, align 8
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[T1]], i8 0, i64 1, i1 false)
; CHECK-NEXT: ret void
;
%t1 = alloca i8, i64 1, align 8
call void @llvm.memset.p0.i64(ptr align 8 %t1, i8 0, i64 4294967296, i1 false)
ret void
}
define i1 @presplit_overlarge_load() {
; CHECK-LABEL: @presplit_overlarge_load(
; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i8, align 2
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L11:%.*]] = load i8, ptr [[A_SROA_0]], align 2
; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L2:%.*]] = load i1, ptr [[A_SROA_0]], align 2
; CHECK-NEXT: ret i1 [[A_SROA_0_0_A_SROA_0_0_L2]]
;
%A = alloca i16
%L1 = load i32, ptr %A
%L2 = load i1, ptr %A
ret i1 %L2
}
|