1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -D#VBITS=128
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=128 | FileCheck %s -D#VBITS=128
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=256 | FileCheck %s -D#VBITS=256
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=384 | FileCheck %s -D#VBITS=256
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=512 | FileCheck %s -D#VBITS=512
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=640 | FileCheck %s -D#VBITS=512
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=768 | FileCheck %s -D#VBITS=512
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=896 | FileCheck %s -D#VBITS=512
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1024 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1152 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1280 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1408 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1536 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1664 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1792 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1920 | FileCheck %s -D#VBITS=1024
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=2048 | FileCheck %s -D#VBITS=2048
; VBITS represents the useful bit size of a vector register from the code
; generator's point of view. It is clamped to power-of-2 values because
; only power-of-2 vector lengths are considered legal, regardless of the
; user specified vector length.
target triple = "aarch64-unknown-linux-gnu"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; Ensure the cost of legalisation is removed as the vector length grows.
; NOTE: Assumes BaseCost_add=1, BaseCost_fadd=2.
define void @add() #0 {
; CHECK-LABEL: function 'add'
; CHECK: cost of [[#div(127,VBITS)+1]] for instruction: %add128 = add <4 x i32> undef, undef
; CHECK: cost of [[#div(255,VBITS)+1]] for instruction: %add256 = add <8 x i32> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512 = add <16 x i32> undef, undef
; CHECK: cost of [[#div(1023,VBITS)+1]] for instruction: %add1024 = add <32 x i32> undef, undef
; CHECK: cost of [[#div(2047,VBITS)+1]] for instruction: %add2048 = add <64 x i32> undef, undef
%add128 = add <4 x i32> undef, undef
%add256 = add <8 x i32> undef, undef
%add512 = add <16 x i32> undef, undef
%add1024 = add <32 x i32> undef, undef
%add2048 = add <64 x i32> undef, undef
; Using a single vector length, ensure all element types are recognised.
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i8 = add <64 x i8> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i16 = add <32 x i16> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i32 = add <16 x i32> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i64 = add <8 x i64> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f16 = fadd <32 x half> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f32 = fadd <16 x float> undef, undef
; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f64 = fadd <8 x double> undef, undef
%add512.i8 = add <64 x i8> undef, undef
%add512.i16 = add <32 x i16> undef, undef
%add512.i32 = add <16 x i32> undef, undef
%add512.i64 = add <8 x i64> undef, undef
%add512.f16 = fadd <32 x half> undef, undef
%add512.f32 = fadd <16 x float> undef, undef
%add512.f64 = fadd <8 x double> undef, undef
ret void
}
; Assuming base_cost = 2
; Assuming legalization_cost = (vec_len-1/VBITS)+1
; For fixed-length vectors >= 128, if element type is i8, multiply the cost by 8.
; For fixed-length vectors >= 128, if element type is i16, multiply the cost by 4.
define void @sdiv() #0 {
; CHECK-LABEL: function 'sdiv'
; CHECK: cost of 5 for instruction: %sdiv16.i8 = sdiv <2 x i8> undef, undef
%sdiv16.i8 = sdiv <2 x i8> undef, undef
; CHECK: cost of 8 for instruction: %sdiv32.i8 = sdiv <4 x i8> undef, undef
%sdiv32.i8 = sdiv <4 x i8> undef, undef
; CHECK: cost of 5 for instruction: %sdiv32.i16 = sdiv <2 x i16> undef, undef
%sdiv32.i16 = sdiv <2 x i16> undef, undef
; CHECK: cost of 8 for instruction: %sdiv64.i8 = sdiv <8 x i8> undef, undef
%sdiv64.i8 = sdiv <8 x i8> undef, undef
; CHECK: cost of 5 for instruction: %sdiv64.i16 = sdiv <4 x i16> undef, undef
%sdiv64.i16 = sdiv <4 x i16> undef, undef
; CHECK: cost of 1 for instruction: %sdiv64.i32 = sdiv <2 x i32> undef, undef
%sdiv64.i32 = sdiv <2 x i32> undef, undef
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction: %sdiv128.i8 = sdiv <16 x i8> undef, undef
%sdiv128.i8 = sdiv <16 x i8> undef, undef
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction: %sdiv128.i16 = sdiv <8 x i16> undef, undef
%sdiv128.i16 = sdiv <8 x i16> undef, undef
; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction: %sdiv128.i64 = sdiv <2 x i64> undef, undef
%sdiv128.i64 = sdiv <2 x i64> undef, undef
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction: %sdiv512.i8 = sdiv <64 x i8> undef, undef
%sdiv512.i8 = sdiv <64 x i8> undef, undef
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction: %sdiv512.i16 = sdiv <32 x i16> undef, undef
%sdiv512.i16 = sdiv <32 x i16> undef, undef
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %sdiv512.i32 = sdiv <16 x i32> undef, undef
%sdiv512.i32 = sdiv <16 x i32> undef, undef
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %sdiv512.i64 = sdiv <8 x i64> undef, undef
%sdiv512.i64 = sdiv <8 x i64> undef, undef
ret void
}
; Assuming base_cost = 2
; Assuming legalization_cost = (vec_len-1/VBITS)+1
; For fixed-length vectors >= 128, if element type is i8, multiply the cost by 8.
; For fixed-length vectors >= 128, if element type is i16, multiply the cost by 4.
define void @udiv() #0 {
; CHECK-LABEL: function 'udiv'
; CHECK: cost of 5 for instruction: %udiv16.i8 = udiv <2 x i8> undef, undef
%udiv16.i8 = udiv <2 x i8> undef, undef
; CHECK: cost of 8 for instruction: %udiv32.i8 = udiv <4 x i8> undef, undef
%udiv32.i8 = udiv <4 x i8> undef, undef
; CHECK: cost of 5 for instruction: %udiv32.i16 = udiv <2 x i16> undef, undef
%udiv32.i16 = udiv <2 x i16> undef, undef
; CHECK: cost of 8 for instruction: %udiv64.i8 = udiv <8 x i8> undef, undef
%udiv64.i8 = udiv <8 x i8> undef, undef
; CHECK: cost of 5 for instruction: %udiv64.i16 = udiv <4 x i16> undef, undef
%udiv64.i16 = udiv <4 x i16> undef, undef
; CHECK: cost of 1 for instruction: %udiv64.i32 = udiv <2 x i32> undef, undef
%udiv64.i32 = udiv <2 x i32> undef, undef
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction: %udiv128.i8 = udiv <16 x i8> undef, undef
%udiv128.i8 = udiv <16 x i8> undef, undef
; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction: %udiv128.i16 = udiv <8 x i16> undef, undef
%udiv128.i16 = udiv <8 x i16> undef, undef
; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction: %udiv128.i64 = udiv <2 x i64> undef, undef
%udiv128.i64 = udiv <2 x i64> undef, undef
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction: %udiv512.i8 = udiv <64 x i8> undef, undef
%udiv512.i8 = udiv <64 x i8> undef, undef
; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction: %udiv512.i16 = udiv <32 x i16> undef, undef
%udiv512.i16 = udiv <32 x i16> undef, undef
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %udiv512.i32 = udiv <16 x i32> undef, undef
%udiv512.i32 = udiv <16 x i32> undef, undef
; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %udiv512.i64 = udiv <8 x i64> undef, undef
%udiv512.i64 = udiv <8 x i64> undef, undef
ret void
}
; The hard-coded expected cost is based on VBITS=128
define void @mul() #0 {
; CHECK: cost of [[#div(128-1, VBITS)+1]] for instruction: %mul128.i64 = mul <2 x i64> undef, undef
%mul128.i64 = mul <2 x i64> undef, undef
; CHECK: cost of [[#div(512-1, VBITS)+1]] for instruction: %mul512.i64 = mul <8 x i64> undef, undef
%mul512.i64 = mul <8 x i64> undef, undef
ret void
}
attributes #0 = { "target-features"="+sve" }
|