1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
|
commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e
Author: Craig Topper <craig.topper@intel.com>
Date: Thu Mar 8 00:21:17 2018 +0000
[X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned.
I believe these were introduced in r312450.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
index db3dfe56531..50c7763a2c3 100644
--- a/lib/Target/X86/X86InstrVecCompiler.td
+++ b/lib/Target/X86/X86InstrVecCompiler.td
@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
// will zero the upper bits.
// TODO: Is there a safe way to detect whether the producing instruction
// already zeroed the upper bits?
-multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
- ValueType DstTy, ValueType SrcTy,
- ValueType ZeroTy, PatFrag memop,
- SubRegIndex SubIdx> {
+multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
+ RegisterClass RC, ValueType DstTy,
+ ValueType SrcTy, ValueType ZeroTy,
+ PatFrag memop, SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
(SrcTy (bitconvert (memop addr:$src))),
(iPTR 0))),
(SUBREG_TO_REG (i64 0),
- (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
+ (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
}
let Predicates = [HasAVX, NoVLX] in {
- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
- sub_xmm>;
- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
- sub_xmm>;
-}
-
-let Predicates = [HasVLX] in {
- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
loadv2f64, sub_xmm>;
- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
loadv4f32, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
- loadv2i64, sub_xmm>;
-
- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
- loadv2f64, sub_xmm>;
- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
- loadv4f32, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
loadv2i64, sub_xmm>;
+}
- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
- loadv4f64, sub_ymm>;
- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
- loadv8f32, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
- loadv4i64, sub_ymm>;
+let Predicates = [HasVLX] in {
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
+ v2f64, v8i32, loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
+ v4f32, v8i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
+ v2i64, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
+ v4i32, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
+ v8i16, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
+ v16i8, v8i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
+ v2f64, v16i32, loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
+ v4f32, v16i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
+ v2i64, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
+ v4i32, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
+ v8i16, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
+ v16i8, v16i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
+ v4f64, v16i32, loadv4f64, sub_ymm>;
+ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
+ v8f32, v16i32, loadv8f32, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
+ v4i64, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
+ v8i32, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
+ v16i16, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
+ v32i8, v16i32, loadv4i64, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
- sub_xmm>;
- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
- sub_xmm>;
-
- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
- loadv4f64, sub_ymm>;
- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
- loadv8f32, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
- loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
+ v16i32,loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
+ v16i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
+ v16i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
+ v16i32, loadv4f64, sub_ymm>;
+ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
+ v16i32, loadv8f32, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
+ v16i32, loadv4i64, sub_ymm>;
}
// List of opcodes that guaranteed to zero the upper elements of vector regs.
diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll
index 6ecd8116443..0f2cf594b1c 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll
@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_2f64_2z:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_2f64_2z:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
%val0 = load <2 x double>, <2 x double>* %ptr0
@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_45zz:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_f64_45zz:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4i64_2i64_3z:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 48(%rdi), %xmm0
+; AVX-NEXT: vmovups 48(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4i64_2i64_3z:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 48(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
%val0 = load <2 x i64>, <2 x i64>* %ptr0
@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4i64_i64_23zz:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 16(%rdi), %xmm0
+; AVX-NEXT: vmovups 16(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4i64_i64_23zz:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 16(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
%ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll
index 62102eb382c..3c6eaf65292 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
; ALL: # %bb.0:
-; ALL-NEXT: vmovaps 8(%rdi), %xmm0
+; ALL-NEXT: vmovups 8(%rdi), %xmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0
+; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
%ptr1 = getelementptr inbounds double, double* %ptr, i64 2
@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_i64_56zz9uzz:
; ALL: # %bb.0:
-; ALL-NEXT: vmovaps 40(%rdi), %xmm0
+; ALL-NEXT: vmovups 40(%rdi), %xmm0
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0
+; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl
|