1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -fenable-matrix %s -emit-llvm -triple x86_64-unknown-linux -disable-llvm-passes -o - -std=c++11 | FileCheck %s
using i8x3 = _BitInt(8) __attribute__((ext_vector_type(3)));
using i8x3x3 = _BitInt(8) __attribute__((matrix_type(3, 3)));
using i32x3 = _BitInt(32) __attribute__((ext_vector_type(3)));
using i32x3x3 = _BitInt(32) __attribute__((matrix_type(3, 3)));
using i512x3 = _BitInt(512) __attribute__((ext_vector_type(3)));
using i512x3x3 = _BitInt(512) __attribute__((matrix_type(3, 3)));
using i4x3 = _BitInt(4) __attribute__((ext_vector_type(3)));
using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));
// CHECK-LABEL: define dso_local i32 @_Z2v1Dv3_DB8_(
// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i8>, align 4
// CHECK-NEXT: [[A:%.*]] = alloca <3 x i8>, align 4
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i8>, align 4
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i8>, ptr [[A]], align 4
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i8> [[LOADVECN]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i8> [[A1]], <3 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i8> [[LOADVECN2]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i8>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC5:%.*]] = shufflevector <4 x i8> [[LOADVECN4]], <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = add <3 x i8> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
// CHECK-NEXT: store <3 x i8> [[ADD]], ptr [[RETVAL]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4
// CHECK-NEXT: ret i32 [[TMP0]]
//
i8x3 v1(i8x3 a) {
return a + a;
}
// CHECK-LABEL: define dso_local noundef <3 x i32> @_Z2v2Dv3_DB32_(
// CHECK-SAME: <3 x i32> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i32>, align 16
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[A]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[EXTRACTVEC1:%.*]] = shufflevector <4 x i32> [[LOADVECN]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i32> [[LOADVECN2]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = add <3 x i32> [[EXTRACTVEC1]], [[EXTRACTVEC3]]
// CHECK-NEXT: ret <3 x i32> [[ADD]]
//
i32x3 v2(i32x3 a) {
return a + a;
}
// CHECK-LABEL: define dso_local noundef <3 x i512> @_Z2v3Dv3_DB512_(
// CHECK-SAME: ptr noundef byval(<3 x i512>) align 256 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i512>, align 256
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i512>, ptr [[TMP0]], align 256
// CHECK-NEXT: [[A:%.*]] = shufflevector <4 x i512> [[LOADVECN]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i512> [[A]], <3 x i512> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i512> [[EXTRACTVEC]], ptr [[A_ADDR]], align 256
// CHECK-NEXT: [[LOADVECN1:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
// CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <4 x i512> [[LOADVECN1]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[LOADVECN3:%.*]] = load <4 x i512>, ptr [[A_ADDR]], align 256
// CHECK-NEXT: [[EXTRACTVEC4:%.*]] = shufflevector <4 x i512> [[LOADVECN3]], <4 x i512> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = add <3 x i512> [[EXTRACTVEC2]], [[EXTRACTVEC4]]
// CHECK-NEXT: ret <3 x i512> [[ADD]]
//
i512x3 v3(i512x3 a) {
return a + a;
}
// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_(
// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false)
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4
// CHECK-NEXT: ret i32 [[TMP0]]
//
i4x3 v4(i4x3 a) {
return a + a;
}
// CHECK-LABEL: define dso_local noundef <9 x i8> @_Z2m1u11matrix_typeILm3ELm3EDB8_E(
// CHECK-SAME: <9 x i8> noundef [[A:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i8], align 1
// CHECK-NEXT: store <9 x i8> [[A]], ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load <9 x i8>, ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i8>, ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = add <9 x i8> [[TMP0]], [[TMP1]]
// CHECK-NEXT: ret <9 x i8> [[TMP2]]
//
i8x3x3 m1(i8x3x3 a) {
return a + a;
}
// CHECK-LABEL: define dso_local noundef <9 x i32> @_Z2m2u11matrix_typeILm3ELm3EDB32_E(
// CHECK-SAME: <9 x i32> noundef [[A:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i32], align 4
// CHECK-NEXT: store <9 x i32> [[A]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load <9 x i32>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = add <9 x i32> [[TMP0]], [[TMP1]]
// CHECK-NEXT: ret <9 x i32> [[TMP2]]
//
i32x3x3 m2(i32x3x3 a) {
return a + a;
}
// CHECK-LABEL: define dso_local noundef <9 x i512> @_Z2m3u11matrix_typeILm3ELm3EDB512_E(
// CHECK-SAME: <9 x i512> noundef [[A:%.*]]) #[[ATTR6:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i512], align 8
// CHECK-NEXT: store <9 x i512> [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load <9 x i512>, ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i512>, ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = add <9 x i512> [[TMP0]], [[TMP1]]
// CHECK-NEXT: ret <9 x i512> [[TMP2]]
//
i512x3x3 m3(i512x3x3 a) {
return a + a;
}
// CHECK-LABEL: define dso_local noundef <9 x i4> @_Z2m4u11matrix_typeILm3ELm3EDB4_E(
// CHECK-SAME: <9 x i4> noundef [[A:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i4], align 1
// CHECK-NEXT: store <9 x i4> [[A]], ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i4>, ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = add <9 x i4> [[TMP0]], [[TMP1]]
// CHECK-NEXT: ret <9 x i4> [[TMP2]]
//
i4x3x3 m4(i4x3x3 a) {
return a + a;
}
|