1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
define void @merge_i32_v2i16_f32_v4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
; CHECK-LABEL: define void @merge_i32_v2i16_f32_v4i8(
; CHECK-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PTR1]], i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr addrspace(1) [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(1) [[PTR1]], i64 1
; CHECK-NEXT: [[LOAD2:%.*]] = load <2 x i16>, ptr addrspace(1) [[GEP2]], align 4
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 2
; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr addrspace(1) [[GEP3]], align 4
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(1) [[PTR1]], i64 3
; CHECK-NEXT: [[LOAD4:%.*]] = load <4 x i8>, ptr addrspace(1) [[GEP4]], align 4
; CHECK-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
; CHECK-NEXT: store i32 [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
; CHECK-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(2) [[PTR2]], i64 1
; CHECK-NEXT: store <2 x i16> [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
; CHECK-NEXT: [[STORE_GEP3:%.*]] = getelementptr inbounds float, ptr addrspace(2) [[PTR2]], i64 2
; CHECK-NEXT: store float [[LOAD3]], ptr addrspace(2) [[STORE_GEP3]], align 4
; CHECK-NEXT: [[STORE_GEP4:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(2) [[PTR2]], i64 3
; CHECK-NEXT: store <4 x i8> [[LOAD4]], ptr addrspace(2) [[STORE_GEP4]], align 4
; CHECK-NEXT: ret void
;
%gep1 = getelementptr inbounds i32, ptr addrspace(1) %ptr1, i64 0
%load1 = load i32, ptr addrspace(1) %gep1, align 4
%gep2 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %ptr1, i64 1
%load2 = load <2 x i16>, ptr addrspace(1) %gep2, align 4
%gep3 = getelementptr inbounds float, ptr addrspace(1) %ptr1, i64 2
%load3 = load float, ptr addrspace(1) %gep3, align 4
%gep4 = getelementptr inbounds <4 x i8>, ptr addrspace(1) %ptr1, i64 3
%load4 = load <4 x i8>, ptr addrspace(1) %gep4, align 4
%store.gep1 = getelementptr inbounds i32, ptr addrspace(2) %ptr2, i64 0
store i32 %load1, ptr addrspace(2) %store.gep1, align 4
%store.gep2 = getelementptr inbounds <2 x i16>, ptr addrspace(2) %ptr2, i64 1
store <2 x i16> %load2, ptr addrspace(2) %store.gep2, align 4
%store.gep3 = getelementptr inbounds float, ptr addrspace(2) %ptr2, i64 2
store float %load3, ptr addrspace(2) %store.gep3, align 4
%store.gep4 = getelementptr inbounds <4 x i8>, ptr addrspace(2) %ptr2, i64 3
store <4 x i8> %load4, ptr addrspace(2) %store.gep4, align 4
ret void
}
define void @merge_f32_v2f16_type(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
; CHECK-LABEL: define void @merge_f32_v2f16_type(
; CHECK-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr addrspace(1) [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[PTR1]], i64 1
; CHECK-NEXT: [[LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[GEP2]], align 4
; CHECK-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
; CHECK-NEXT: store float [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
; CHECK-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(2) [[PTR2]], i64 1
; CHECK-NEXT: store <2 x half> [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
; CHECK-NEXT: ret void
;
%gep1 = getelementptr inbounds float, ptr addrspace(1) %ptr1, i64 0
%load1 = load float, ptr addrspace(1) %gep1, align 4
%gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %ptr1, i64 1
%load2 = load <2 x half>, ptr addrspace(1) %gep2, align 4
%store.gep1 = getelementptr inbounds i32, ptr addrspace(2) %ptr2, i64 0
store float %load1, ptr addrspace(2) %store.gep1, align 4
%store.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(2) %ptr2, i64 1
store <2 x half> %load2, ptr addrspace(2) %store.gep2, align 4
ret void
}
define void @merge_v2f16_bfloat_type(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
; CHECK-LABEL: define void @merge_v2f16_bfloat_type(
; CHECK-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds bfloat, ptr addrspace(1) [[PTR1]], i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load bfloat, ptr addrspace(1) [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[PTR1]], i64 1
; CHECK-NEXT: [[LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[GEP2]], align 4
; CHECK-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
; CHECK-NEXT: store bfloat [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
; CHECK-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(2) [[PTR2]], i64 1
; CHECK-NEXT: store <2 x half> [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
; CHECK-NEXT: ret void
;
%gep1 = getelementptr inbounds bfloat, ptr addrspace(1) %ptr1, i64 0
%load1 = load bfloat, ptr addrspace(1) %gep1, align 4
%gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %ptr1, i64 1
%load2 = load <2 x half>, ptr addrspace(1) %gep2, align 4
%store.gep1 = getelementptr inbounds i32, ptr addrspace(2) %ptr2, i64 0
store bfloat %load1, ptr addrspace(2) %store.gep1, align 4
%store.gep2 = getelementptr inbounds <2 x half>, ptr addrspace(2) %ptr2, i64 1
store <2 x half> %load2, ptr addrspace(2) %store.gep2, align 4
ret void
}
define void @no_merge_mixed_ptr_addrspaces(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
; CHECK-LABEL: define void @no_merge_mixed_ptr_addrspaces(
; CHECK-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR1]], i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(1) [[PTR1]], i64 1
; CHECK-NEXT: [[LOAD2:%.*]] = load ptr addrspace(2), ptr addrspace(1) [[GEP2]], align 4
; CHECK-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
; CHECK-NEXT: store ptr addrspace(1) [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
; CHECK-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(2) [[PTR2]], i64 1
; CHECK-NEXT: store ptr addrspace(2) [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
; CHECK-NEXT: ret void
;
%gep1 = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) %ptr1, i64 0
%load1 = load ptr addrspace(1), ptr addrspace(1) %gep1, align 4
%gep2 = getelementptr inbounds ptr addrspace(2), ptr addrspace(1) %ptr1, i64 1
%load2 = load ptr addrspace(2), ptr addrspace(1) %gep2, align 4
%store.gep1 = getelementptr inbounds i32, ptr addrspace(2) %ptr2, i64 0
store ptr addrspace(1) %load1, ptr addrspace(2) %store.gep1, align 4
%store.gep2 = getelementptr inbounds ptr addrspace(2), ptr addrspace(2) %ptr2, i64 1
store ptr addrspace(2) %load2, ptr addrspace(2) %store.gep2, align 4
ret void
}
|