1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
define i32 @t(ptr %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
; X86-LABEL: t:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%eax,%ecx), %eax
; X86-NEXT: retl
;
; X64-LABEL: t:
; X64: ## %bb.0: ## %entry
; X64-NEXT: imull %ecx, %esi
; X64-NEXT: addl %edx, %esi
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movl (%rdi,%rax), %eax
; X64-NEXT: retq
entry:
%tmp7 = mul i32 %idxY, %ref_frame_stride ; <i32> [#uses=2]
%tmp9 = add i32 %tmp7, %idxX ; <i32> [#uses=1]
%tmp11 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp9 ; <ptr> [#uses=1]
%tmp13 = load i32, ptr %tmp11, align 4 ; <i32> [#uses=1]
%tmp18 = add i32 %idxX, 4 ; <i32> [#uses=1]
%tmp20.sum = add i32 %tmp18, %tmp7 ; <i32> [#uses=1]
%tmp21 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp20.sum ; <ptr> [#uses=1]
%tmp23 = load i16, ptr %tmp21, align 2 ; <i16> [#uses=1]
%tmp2425 = zext i16 %tmp23 to i64 ; <i64> [#uses=1]
%tmp26 = shl i64 %tmp2425, 32 ; <i64> [#uses=1]
%tmp2728 = zext i32 %tmp13 to i64 ; <i64> [#uses=1]
%tmp29 = or i64 %tmp26, %tmp2728 ; <i64> [#uses=1]
%tmp3454 = bitcast i64 %tmp29 to double ; <double> [#uses=1]
%tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0 ; <<2 x double>> [#uses=1]
%tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1 ; <<2 x double>> [#uses=1]
%tmp42 = bitcast <2 x double> %tmp36 to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1]
%tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; <i32> [#uses=1]
ret i32 %tmp48
}
; Test CSE for SDAG nodes with multiple results (UMUL_LOHI).
define i96 @square_high(i96 %x) nounwind {
; X86-LABEL: square_high:
; X86: ## %bb.0: ## %entry
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %edi
; X86-NEXT: addl %eax, %ebx
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: adcl $0, %ebp
; X86-NEXT: addl %eax, %ebx
; X86-NEXT: adcl %edx, %ebp
; X86-NEXT: setb %al
; X86-NEXT: movzbl %al, %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, (%esp) ## 4-byte Spill
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl %esi, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movl %eax, %edi
; X86-NEXT: addl (%esp), %edi ## 4-byte Folded Reload
; X86-NEXT: adcl $0, %esi
; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: adcl %edi, %ecx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: adcl $0, %eax
; X86-NEXT: setb %dl
; X86-NEXT: addl %ebp, %ebx
; X86-NEXT: adcl %ecx, %edi
; X86-NEXT: movzbl %dl, %ecx
; X86-NEXT: adcl %eax, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %eax
; X86-NEXT: addl %eax, %esi
; X86-NEXT: adcl %edx, %ecx
; X86-NEXT: movl %edi, %eax
; X86-NEXT: movl %esi, %edx
; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: square_high:
; X64: ## %bb.0: ## %entry
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %rdi
; X64-NEXT: movq %rdx, %rsi
; X64-NEXT: movq %rax, %r8
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: mulq %rdi
; X64-NEXT: addq %r8, %rdx
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: adcq $0, %rax
; X64-NEXT: addq %rdx, %r8
; X64-NEXT: adcq %rsi, %rax
; X64-NEXT: imulq %rcx, %rcx
; X64-NEXT: addq %rax, %rcx
; X64-NEXT: shrdq $32, %rcx, %r8
; X64-NEXT: shrq $32, %rcx
; X64-NEXT: movq %r8, %rax
; X64-NEXT: movq %rcx, %rdx
; X64-NEXT: retq
entry:
%conv = zext i96 %x to i192
%mul = mul nuw i192 %conv, %conv
%shr = lshr i192 %mul, 96
%conv2 = trunc i192 %shr to i96
ret i96 %conv2
}
|