1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
|
// This file is a part of Julia. License is MIT: https://julialang.org/license
//===----------------------------------------------------------------------===//
//
// The ABI implementation used for AArch64 targets.
//
//===----------------------------------------------------------------------===//
//
// The Procedure Call Standard can be found here:
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
//
//===----------------------------------------------------------------------===//
struct ABI_AArch64Layout : AbiLayout {
Type *get_llvm_vectype(jl_datatype_t *dt) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
if (dt->layout == NULL)
return nullptr;
size_t nfields = dt->layout->nfields;
assert(nfields > 0);
if (nfields < 2)
return nullptr;
static Type *T_vec64 = VectorType::get(T_int32, 2);
static Type *T_vec128 = VectorType::get(T_int32, 4);
Type *lltype;
// Short vector should be either 8 bytes or 16 bytes.
// Note that there are only two distinct fundamental types for
// short vectors so we normalize them to <2 x i32> and <4 x i32>
switch (jl_datatype_size(dt)) {
case 8:
lltype = T_vec64;
break;
case 16:
lltype = T_vec128;
break;
default:
return nullptr;
}
// Since `dt` is pointer free and has no padding and is 8 or 16 in size,
// `ft0` must be concrete, immutable with no padding and we don't need
// to check if its size is legal since it is included in
// the homogeneity check.
jl_datatype_t *ft0 = (jl_datatype_t*)jl_field_type(dt, 0);
// `ft0` should be a `VecElement` type and the true element type
// should be a primitive type
if (ft0->name != jl_vecelement_typename ||
((jl_datatype_t*)jl_field_type(ft0, 0))->layout->nfields)
return nullptr;
for (size_t i = 1; i < nfields; i++) {
if (jl_field_type(dt, i) != (jl_value_t*)ft0) {
// Not homogeneous
return nullptr;
}
}
return lltype;
}
#define jl_is_floattype(v) jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
Type *get_llvm_fptype(jl_datatype_t *dt) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
Type *lltype;
// Check size first since it's cheaper.
switch (jl_datatype_size(dt)) {
case 2:
lltype = T_float16;
break;
case 4:
lltype = T_float32;
break;
case 8:
lltype = T_float64;
break;
case 16:
lltype = T_float128;
break;
default:
return nullptr;
}
return ((jl_floatingpoint_type && jl_is_floattype((jl_value_t*)dt)) ?
lltype : nullptr);
}
Type *get_llvm_fp_or_vectype(jl_datatype_t *dt) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
if (dt->mutabl || dt->layout->npointers || dt->layout->haspadding)
return nullptr;
return dt->layout->nfields ? get_llvm_vectype(dt) : get_llvm_fptype(dt);
}
struct ElementType {
Type *type;
size_t sz;
ElementType() : type(nullptr), sz(0) {};
};
// Whether a type is a homogeneous floating-point aggregates (HFA) or a
// homogeneous short-vector aggregates (HVA). Returns the element type.
// An Homogeneous Aggregate is a Composite Type where all of the Fundamental
// Data Types of the members that compose the type are the same.
// Note that it is the fundamental types that are important and not the member
// types.
bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) const
{
// Assume:
// dt is a pointerfree type, (all members are isbits)
// dsz == jl_datatype_size(dt) > 0
// 0 <= nele <= 3
// dt has no padding
// We ignore zero sized member here. This isn't really consistent with
// GCC for zero-sized array members. GCC seems to treat structs with
// zero sized array members as non-HFA and non-HVA. Clang (3.7 and 3.8)
// handles this slightly differently.
// Ref https://llvm.org/bugs/show_bug.cgi?id=26162
while (size_t nfields = jl_datatype_nfields(dt)) {
// For composite types, find the first non zero sized member
size_t i;
size_t fieldsz;
for (i = 0;i < nfields;i++) {
if ((fieldsz = jl_field_size(dt, i))) {
break;
}
}
assert(i < nfields);
// If there's only one non zero sized member, try again on this member
if (fieldsz == dsz) {
dt = (jl_datatype_t*)jl_field_type(dt, i);
continue;
}
if (Type *vectype = get_llvm_vectype(dt)) {
if ((ele.sz && dsz != ele.sz) || (ele.type && ele.type != vectype))
return false;
ele.type = vectype;
ele.sz = dsz;
nele++;
return true;
}
// Otherwise, process each members
for (;i < nfields;i++) {
size_t fieldsz = jl_field_size(dt, i);
if (fieldsz == 0)
continue;
jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i);
// Check element count.
// This needs to be done after the zero size member check
if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele)) {
return false;
}
}
return true;
}
// For bitstypes
if (ele.sz && dsz != ele.sz)
return false;
Type *new_type = get_llvm_fptype(dt);
if (new_type && (!ele.type || ele.type == new_type)) {
ele.type = new_type;
ele.sz = dsz;
nele++;
return true;
}
return false;
}
Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// An Homogeneous Floating-point Aggregate (HFA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Floating-Point type and at most
// four uniquely addressable members.
// An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Short-Vector type and at most four
// uniquely addressable members.
// Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
size_t dsz = jl_datatype_size(dt);
if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->haspadding)
return NULL;
nele = 0;
ElementType eltype;
if (isHFAorHVA(dt, dsz, nele, eltype))
return eltype.type;
return NULL;
}
bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
{
// B.2
// If the argument type is an HFA or an HVA, then the argument is used
// unmodified.
size_t size;
if (isHFAorHVA(dt, size))
return false;
// B.3
// If the argument type is a Composite Type that is larger than 16 bytes,
// then the argument is copied to memory allocated by the caller and the
// argument is replaced by a pointer to the copy.
// We only check for the total size and not whether it is a composite type
// since there's no corresponding C type and we just treat such large
// bitstype as a composite type of the right size.
return jl_datatype_size(dt) > 16;
// B.4
// If the argument type is a Composite Type then the size of the argument
// is rounded up to the nearest multiple of 8 bytes.
}
// Determine which kind of register the argument will be passed in and
// if the argument has to be passed on stack (including by reference).
//
// If the argument should be passed in SIMD and floating-point registers,
// we may need to rewrite the argument types to [n x ftype].
// If the argument should be passed in general purpose registers, we may need
// to rewrite the argument types to [n x i64].
//
// If the argument has to be passed on stack, we need to use sret.
//
// All the out parameters should be default to `false`.
Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
size_t *rewrite_len) const
{
// Based on section 5.4 C of the Procedure Call Standard
// C.1
// If the argument is a Half-, Single-, Double- or Quad- precision
// Floating-point or Short Vector Type and the NSRN is less than 8, then
// the argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated.
if (get_llvm_fp_or_vectype(dt)) {
*fpreg = true;
return NULL;
}
// C.2
// If the argument is an HFA or an HVA and there are sufficient
// unallocated SIMD and Floating-point registers (NSRN + number of
// members <= 8), then the argument is allocated to SIMD and
// Floating-point Registers (with one register per member of the HFA
// or HVA). The NSRN is incremented by the number of registers used.
// The argument has now been allocated.
if (Type *eltype = isHFAorHVA(dt, *rewrite_len)) {
assert(*rewrite_len > 0 && *rewrite_len <= 4);
// HFA and HVA have <= 4 members
*fpreg = true;
// Rewrite to [n x eltype] where n is the number of fundamental types.
return eltype;
}
// Check if the argument needs to be passed by reference. This should be
// done before starting step C but we do this here to avoid checking for
// HFA and HVA twice. We don't check whether it is a composite type.
// See `needPassByRef` above.
if (jl_datatype_size(dt) > 16) {
*onstack = true;
return NULL;
}
// C.3
// If the argument is an HFA or an HVA then the NSRN is set to 8 and the
// size of the argument is rounded up to the nearest multiple of 8 bytes.
// C.4
// If the argument is an HFA, an HVA, a Quad-precision Floating-point or
// Short Vector Type then the NSAA is rounded up to the larger of 8 or
// the Natural Alignment of the argument’s type.
// C.5
// If the argument is a Half- or Single- precision Floating Point type,
// then the size of the argument is set to 8 bytes. The effect is as if
// the argument had been copied to the least significant bits of a 64-bit
// register and the remaining bits filled with unspecified values.
// C.6
// If the argument is an HFA, an HVA, a Half-, Single-, Double- or
// Quad- precision Floating-point or Short Vector Type, then the argument
// is copied to memory at the adjusted NSAA. The NSAA is incremented
// by the size of the argument. The argument has now been allocated.
// <already included in the C.2 case above>
// C.7
// If the argument is an Integral or Pointer Type, the size of the
// argument is less than or equal to 8 bytes and the NGRN is less than 8,
// the argument is copied to the least significant bits in x[NGRN].
// The NGRN is incremented by one. The argument has now been allocated.
// Here we treat any bitstype of the right size as integers or pointers
// This is needed for types like Cstring which should be treated as
// pointers. We don't need to worry about floating points here since they
// are handled above.
if (jl_is_immutable(dt) && jl_datatype_nfields(dt) == 0 &&
(jl_datatype_size(dt) == 1 || jl_datatype_size(dt) == 2 ||
jl_datatype_size(dt) == 4 || jl_datatype_size(dt) == 8 ||
jl_datatype_size(dt) == 16))
return NULL;
// C.8
// If the argument has an alignment of 16 then the NGRN is rounded up to
// the next even number.
// C.9
// If the argument is an Integral Type, the size of the argument is equal
// to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
// and x[NGRN+1]. x[NGRN] shall contain the lower addressed double-word
// of the memory representation of the argument. The NGRN is incremented
// by two. The argument has now been allocated.
// <merged into C.7 above>
// C.10
// If the argument is a Composite Type and the size in double-words of
// the argument is not more than 8 minus NGRN, then the argument is
// copied into consecutive general-purpose registers, starting at x[NGRN].
// The argument is passed as though it had been loaded into the registers
// from a double-word-aligned address with an appropriate sequence of LDR
// instructions loading consecutive registers from memory (the contents of
// any unused parts of the registers are unspecified by this standard).
// The NGRN is incremented by the number of registers used. The argument
// has now been allocated.
// We don't check for composite types here since the ones that have
// corresponding C types are already handled and we just treat the ones
// with weird size as a black box composite type.
// The type can fit in 8 x 8 bytes since it is handled by
// need_pass_by_ref otherwise.
// 0-size types (Nothing) won't be rewritten and that is what we want
assert(jl_datatype_size(dt) <= 16); // Should be pass by reference otherwise
*rewrite_len = (jl_datatype_size(dt) + 7) >> 3;
// Rewrite to [n x Int64] where n is the **size in dword**
return jl_datatype_size(dt) ? T_int64 : NULL;
// C.11
// The NGRN is set to 8.
// C.12
// The NSAA is rounded up to the larger of 8 or the Natural Alignment
// of the argument’s type.
// C.13
// If the argument is a composite type then the argument is copied to
// memory at the adjusted NSAA. The NSAA is incremented by the size of
// the argument. The argument has now been allocated.
// <handled by C.10 above>
// C.14
// If the size of the argument is less than 8 bytes then the size of the
// argument is set to 8 bytes. The effect is as if the argument was
// copied to the least significant bits of a 64-bit register and the
// remaining bits filled with unspecified values.
// C.15
// The argument is copied to memory at the adjusted NSAA. The NSAA is
// incremented by the size of the argument. The argument has now been
// allocated.
// <handled by C.10 above>
}
bool use_sret(jl_datatype_t *dt) override
{
// Section 5.5
// If the type, T, of the result of a function is such that
//
// void func(T arg)
//
// would require that arg be passed as a value in a register (or set of
// registers) according to the rules in section 5.4 Parameter Passing,
// then the result is returned in the same registers as would be used for
// such an argument.
bool fpreg = false;
bool onstack = false;
size_t rewrite_len = 0;
classify_arg(dt, &fpreg, &onstack, &rewrite_len);
return onstack;
}
Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
{
if (Type *fptype = get_llvm_fp_or_vectype(dt))
return fptype;
bool fpreg = false;
bool onstack = false;
size_t rewrite_len = 0;
if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len))
return ArrayType::get(rewrite_ty, rewrite_len);
return NULL;
}
};
|