1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X86 %s
; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X64 %s
; If there is no explicit MMX type usage, always promote to XMM.
define void @test0(ptr %x) nounwind {
; X86-LABEL: test0:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: movlps %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: test0:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X64-NEXT: movq %xmm0, (%rdi)
; X64-NEXT: retq
entry:
%tmp2 = load <1 x i64>, ptr %x
%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>
%tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
%tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64>
store <1 x i64> %tmp10, ptr %x
ret void
}
define void @test1() nounwind {
; X86-LABEL: test1:
; X86: ## %bb.0: ## %entry
; X86-NEXT: pushl %edi
; X86-NEXT: pxor %mm0, %mm0
; X86-NEXT: movq {{\.?LCPI[0-9]+_[0-9]+}}, %mm1 ## mm1 = 0x7070606040400000
; X86-NEXT: xorl %edi, %edi
; X86-NEXT: maskmovq %mm1, %mm0
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: test1:
; X64: ## %bb.0: ## %entry
; X64-NEXT: pxor %mm0, %mm0
; X64-NEXT: movq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %mm1 ## mm1 = 0x7070606040400000
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: maskmovq %mm1, %mm0
; X64-NEXT: retq
entry:
%tmp528 = bitcast <8 x i8> zeroinitializer to <2 x i32>
%tmp529 = and <2 x i32> %tmp528, bitcast (<4 x i16> < i16 -32640, i16 16448, i16 8224, i16 4112 > to <2 x i32>)
%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16>
%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 >
%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8>
%tmp556 = bitcast <8 x i8> %tmp555 to <1 x i64>
%tmp557 = bitcast <8 x i8> zeroinitializer to <1 x i64>
tail call void @llvm.x86.mmx.maskmovq( <1 x i64> %tmp557, <1 x i64> %tmp556, ptr null)
ret void
}
@tmp_V2i = common global <2 x i32> zeroinitializer
define void @test2() nounwind {
; X86-LABEL: test2:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl L_tmp_V2i$non_lazy_ptr, %eax
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X86-NEXT: movlps %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: test2:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movq _tmp_V2i@GOTPCREL(%rip), %rax
; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-NEXT: movq %xmm0, (%rax)
; X64-NEXT: retq
entry:
%0 = load <2 x i32>, ptr @tmp_V2i, align 8
%1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
store <2 x i32> %1, ptr @tmp_V2i, align 8
ret void
}
define <4 x float> @pr35869() nounwind {
; X86-LABEL: pr35869:
; X86: ## %bb.0:
; X86-NEXT: movl $64, %eax
; X86-NEXT: movd %eax, %mm0
; X86-NEXT: pxor %mm1, %mm1
; X86-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
; X86-NEXT: pcmpgtw %mm0, %mm1
; X86-NEXT: movq %mm0, %mm2
; X86-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3]
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: cvtpi2ps %mm2, %xmm0
; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; X86-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
; X86-NEXT: cvtpi2ps %mm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: pr35869:
; X64: ## %bb.0:
; X64-NEXT: movl $64, %eax
; X64-NEXT: movd %eax, %mm0
; X64-NEXT: pxor %mm1, %mm1
; X64-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
; X64-NEXT: pcmpgtw %mm0, %mm1
; X64-NEXT: movq %mm0, %mm2
; X64-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3]
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: cvtpi2ps %mm2, %xmm0
; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; X64-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
; X64-NEXT: cvtpi2ps %mm0, %xmm0
; X64-NEXT: retq
%1 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> bitcast (<8 x i8> <i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> to <1 x i64>), <1 x i64> bitcast (<8 x i8> zeroinitializer to <1 x i64>))
%2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> bitcast (<4 x i16> zeroinitializer to <1 x i64>), <1 x i64> %1)
%3 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %1, <1 x i64> %2)
%4 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> zeroinitializer, <1 x i64> %3)
%5 = shufflevector <4 x float> %4, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%6 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %1, <1 x i64> %2)
%7 = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %5, <1 x i64> %6)
ret <4 x float> %7
}
declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr)
declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>)
declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)
|