1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
|
/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __muldf3
.balign 4
__muldf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __muldf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __muldf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
and r12,DBL0H,DBL1H
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
jeq_s [blink]
b abort
#define __muldf3 __muldf3_asm
#endif /* DEBUG */
__muldf3_support: /* This label makes debugger output saner. */
.balign 4
FUNC(__muldf3)
.Ldenorm_2:
breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
norm.f r12,DBL1L
mov.mi r12,21
add.pl r12,r12,22
neg r11,r12
asl_s r12,r12,20
lsr.f DBL1H,DBL1L,r11
ror DBL1L,DBL1L,r11
sub_s DBL0H,DBL0H,r12
mov.eq DBL1H,DBL1L
sub_l DBL1L,DBL1L,DBL1H
/* Fall through. */
.global __muldf3
.balign 4
__muldf3:
mulu64 DBL0L,DBL1L
ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)]
bmsk r6,DBL0H,19
bset r6,r6,20
and r11,DBL0H,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,DBL1H,r9
breq.d r12,0,.Ldenorm_dbl1
mov r8,mlo
mov r4,mhi
mulu64 r6,DBL1L
breq.d r11,r9,.Linf_nan
bmsk r10,DBL1H,19
breq.d r12,r9,.Linf_nan
bset r10,r10,20
add.f r4,r4,mlo
adc r5,mhi,0
mulu64 r10,DBL0L
add_s r12,r12,r11 ; add exponents
add.f r4,r4,mlo
adc r5,r5,mhi
mulu64 r6,r10
tst r8,r8
bclr r8,r9,30 ; 0x3ff00000
bset.ne r4,r4,0 ; put least significant word into sticky bit
bclr r6,r9,20 ; 0x7fe00000
add.f r5,r5,mlo
adc r7,mhi,0 ; fraction product in r7:r5:r4
lsr.f r10,r7,9
rsub.eq r8,r8,r9 ; 0x40000000
sub r12,r12,r8 ; subtract bias + implicit 1
brhs.d r12,r6,.Linf_denorm
rsub r10,r10,12
.Lshift_frac:
neg r8,r10
asl r6,r4,r10
lsr DBL0L,r4,r8
add.f 0,r6,r6
btst.eq DBL0L,0
cmp.eq r4,r4 ; round to nearest / round to even
asl r4,r5,r10
lsr r5,r5,r8
adc.f DBL0L,DBL0L,r4
xor.f 0,DBL0H,DBL1H
asl r7,r7,r10
add_s r12,r12,r5
adc DBL0H,r12,r7
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* If one number is denormal, subtract some from the exponent of the other
one (if the other exponent is too small, return 0), and normalize the
denormal. Then re-run the computation. */
.Lret0_2:
lsr_s DBL0H,DBL0H,31
asl_s DBL0H,DBL0H,31
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Ldenorm_dbl0:
mov_s r12,DBL0L
mov_s DBL0L,DBL1L
mov_s DBL1L,r12
mov_s r12,DBL0H
mov_s DBL0H,DBL1H
mov_s DBL1H,r12
and r11,DBL0H,r9
.Ldenorm_dbl1:
brhs r11,r9,.Linf_nan
brhs 0x3ca00001,r11,.Lret0
sub_s DBL0H,DBL0H,DBL1H
bmsk.f DBL1H,DBL1H,30
add_s DBL0H,DBL0H,DBL1H
beq.d .Ldenorm_2
norm r12,DBL1H
sub_s r12,r12,10
asl r5,r12,20
asl_s DBL1H,DBL1H,r12
sub DBL0H,DBL0H,r5
neg r5,r12
lsr r6,DBL1L,r5
asl_s DBL1L,DBL1L,r12
b.d __muldf3
add_s DBL1H,DBL1H,r6
.Lret0: xor_s DBL0H,DBL0H,DBL1H
bclr DBL1H,DBL0H,31
xor_s DBL0H,DBL0H,DBL1H
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Linf_nan:
bclr r12,DBL1H,31
xor_s DBL1H,DBL1H,DBL0H
bclr_s DBL0H,DBL0H,31
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
or.f 0,DBL0H,DBL0L
mov_s DBL0L,0
or.ne.f DBL1L,DBL1L,r12
not_s DBL0H,DBL0L ; inf * 0 -> NaN
mov.ne DBL0H,r8
tst_s DBL1H,DBL1H
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* We have checked for infinity / NaN input before, and transformed
denormalized inputs into normalized inputs. Thus, the worst case
exponent overflows are:
1 + 1 - 0x400 == 0xc02 : maximum underflow
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
N.B. 0x7e and 0x7f are also values for overflow.
If (r12 <= -54), we have an underflow to zero. */
.balign 4
.Linf_denorm:
lsr r6,r12,28
brlo.d r6,0xc,.Linf
asr r6,r12,20
add.f r10,r10,r6
brgt.d r10,0,.Lshift_frac
mov_s r12,0
beq.d .Lround_frac
add r10,r10,32
.Lshift32_frac:
tst r4,r4
mov r4,r5
bset.ne r4,r4,1
mov r5,r7
brge.d r10,1,.Lshift_frac
mov r7,0
breq.d r10,0,.Lround_frac
add r10,r10,32
brgt r10,21,.Lshift32_frac
b_s .Lret0
.Lround_frac:
add.f 0,r4,r4
btst.eq r5,0
mov_s DBL0L,r5
mov_s DBL0H,r7
adc.eq.f DBL0L,DBL0L,0
j_s.d [blink]
adc.eq DBL0H,DBL0H,0
.Linf: mov_s DBL0L,0
xor.f DBL1H,DBL1H,DBL0H
mov_s DBL0H,r9
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
ENDFUNC(__muldf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
|