File: log.S

package info (click to toggle)
libffm 0.28-6
links: PTS
area: main
in suites: etch, etch-m68k
size: 216 kB
ctags: 184
sloc: asm: 3,028; makefile: 94; ansic: 12; sh: 2
file content (303 lines) | stat: -rw-r--r-- 5,958 bytes
parent folder | download | duplicates (4)
/*
   libffm	- Free, pretty fast replacement for some math (libm) routines 
			on Linux/AXP, optimized for the 21164

   Copyright (C) 1998  Joachim Wesner <joachim.wesner@frankfurt.netsurf.de>
                  and  Kazushige Goto <goto@statabo.rim.or.jp>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with this library (see file COPYING.LIB); if not, write 
   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, 
   MA 02139, USA.
*/
/*
   Fast log approximation(s) including range reduction by Joachim Wesner,
   joachim.wesner@frankfurt.netsurf.de, see also mc 8/1991 p. 78-93.
   July 12 1998 JW.
*/

/* improved and re-scheduled by Kazushige Goto <goto@statabo.rim.or.jp> */

/* 
   now including illegal/denormal argument handling by Kazushige Goto
   (some fixes by Joachim Wesner), Nov 1998
*/

	.set noat
	.set noreorder

#ifdef __ELF__
	.section .rodata
#else
	.rdata
#endif

	.align 5
R:
	.t_floating 7.07106781186547572737e-1	# SQRT05
	.t_floating 1.00000000000000000000e0
	.t_floating 6.93147180559945286227e-1	# LOG2
	.t_floating 3.01029995663981198017e-1	# LOG10_2

	/* Derived from Chebyshev Approx. */
	/* rel. error 1.5e-18             */
	.t_floating 2.62332293764593771357e-1
	.t_floating 2.13182032931477888349e-1
	.t_floating 4.12198585366190917156e-1
	.t_floating 9.61796693925982992823e-1
	.t_floating 3.20598577851792521098e-1
	.t_floating 2.20962847584267652046e-1
	.t_floating 5.77078016348200772967e-1
	.t_floating 2.88539008177792677401e0

.text

	.align 5
	.globl log2
	.ent log2
log2:
	lda	$30,  -16($30)
	ldgp	$29,.-log2($27)
	stt	$f16,   0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue	1

	lda	$28,  R
	cpyse	$f16, $f31, $f30
	unop
	fblt	$f16, $NaN

	fbeq	$f16, $MInf

	ldq	$1,     0($30)
	ldt	$f22, 0($28)		# R[0]
	ldt	$f1,  8($28)

	lda	$3,   8($28)		# LOG_X
	br	$31, $continue
	.end log2

	.align 5
	.globl log10
	.ent log10
log10:
	lda	$30,  -16($30)
	ldgp	$29,.-log10($27)
	stt	$f16,   0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue	1

	lda	$28,  R
	cpyse	$f16, $f31, $f30
	unop
	fblt	$f16, $NaN
 
	nop
	fbeq	$f16, $MInf

	ldq	$1,     0($30)
	ldt	$f22, 0($28)		# R[0]
	ldt	$f1,  8($28)
	lda	$3,  24($28)		# LOG_X

	br	$31, $continue
	.end log10

	.align 5
	.globl log
	.ent log
log:
	lda	$30,   -16($30)
	ldgp	$29,.-log($27)
	stt	$f16,    0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue	1

	lda	$28,  R
	cpyse	$f16, $f31, $f30
	fblt	$f16, $NaN
	fbeq	$f16, $MInf

	ldq	$1,     0($30)
	ldt	$f22, 0($28)		# R[0]
	ldt	$f1,  8($28)
	lda	$3,  16($28)		# LOG_X
	.align 4	

$continue:
	srl	$1, 52, $4
	cpyse	$f22, $f16, $f11	# copy E
	lda	$5, 0x7ff	
	fbeq	$f30, $SubNormal

	srl	$1, 52, $1
	cmpeq	$4, $5, $6
	lda	$1,-1022($1)
	bne	$6, $NaN
	.align 4

$SubContinue:
	cmptlt	$f11, $f22, $f10
	subt	$f11, $f1,  $f29	# y4 = x - R[1]
	addt	$f11, $f1,  $f30	# y6 = x + R[1]

	fbeq	$f10, $34		# if (x<y)
	addt	$f29, $f11, $f29	# y4 += x
	subl	$1,1,$1
	addt	$f30, $f11, $f30	# y6 += x
$34:
	divt	$f29, $f30, $f18
	stq	$1,8($30)
	.align 4

# wait, wait, wait, for a long time

	ldt	$f23,  32($28)		# R[4]
	ldt	$f27,  64($28)		# R[8]
	ldt	$f24,  40($28)		# R[5]
	ldt	$f25,  48($28)		# R[6]
	ldt	$f26,  56($28)		# R[7]
	ldt	$f28,  72($28)		# R[9]
	ldt	$f29,  80($28)		# R[10]
	ldt	$f30,  88($28)		# R[11]

	ldt	$f15,  8($30)		# load iexp
	addq	$30, 16, $30
	mult	$f18, $f18, $f19	# y  = x  * x

	mult	$f19, $f19, $f20	# y2 = y  * y
	mult	$f23, $f19, $f23	# t2 = R[4] * y
	mult	$f24, $f19, $f24	# t1 = R[5] * y
	mult	$f25, $f19, $f25	# t3 = R[6] * y

	mult	$f20, $f20, $f21	# y4 = y2 * y2
	mult	$f26, $f19, $f26	# t4 = R[7] * y
	addt	$f23, $f27, $f23	# t2 += R[8]
	addt	$f24, $f28, $f24	# t1 += R[9]

	addt	$f25, $f29, $f25	# t3 += R[10]
	addt	$f26, $f30, $f26	# t4 += R[11]

	mult	$f21, $f20, $f22	# y6 = y4 * y2
	ldt	$f1,  0($3)
	mult	$f23, $f21, $f23	# t2 *= y4
	mult	$f24, $f22, $f24	# t1 *= y6
	mult	$f25, $f20, $f25	# t3 *= y2

	cvtqt	$f15, $f0		# int -> float
	addt	$f24, $f23, $f23	# t1 += t2
	addt	$f23, $f25, $f23	# t1 += t3
	addt	$f23, $f26, $f23	# t1 += t4

	mult	$f23, $f18, $f23	# y *= x

	addt	$f23, $f0,  $f0		# y + iexp
	mult	$f0,  $f1,  $f0

	ret	$31,($26),1
	.align 4

# Exceptional Handling
#     log(x<0, -Inf) = NaN
#     log(+Inf) = +Inf
#     log(+0)   = -Inf
#     log(-0)   = NaN
#     log(NaN)  = NaN

$NaN:
	lda	$0, -1
	stq	$0,  0($30)
	ldt	$f0, 0($30)
	addq	$30, 16, $30
	ret
	.align 4

$MInf:
	ldq	$0, 0($30)		### -0 ?
	blt	$0,$NaN			### log(-0) -> NaN
	lda	$0, 0xfff
	sll	$0, 52, $0
	stq	$0,  0($30)
	ldt	$f0, 0($30)
	addq	$30, 16, $30
	ret
	.align 4

$SubNormal:
	ldah	$23, 0x0010
	mov	$1, $22
	sll	$23, 32, $23		# generate 0x00100000 00000000
	lda	$1,-1021		###
	.align 4

	zap	$22,  0x0f, $25
	bne	$25, $SubNext1
	sll	$22, 20, $22
	subq	$1, 20, $1

	zap	$22, 0x0f, $25
	bne	$25, $SubNext1
	sll	$22, 20, $22
	subq	$1, 20, $1
	.align 4

$SubNext1:
	zap	$22, 0x1f, $25
	bne	$25, $SubNext2
	sll	$22, 12, $22
	subq	$1, 12, $1

$SubNext2:
	zap	$22, 0x3f, $25
	bne	$25, $SubLoop
	sll	$22, 4, $22
	subq	$1, 4, $1
	.align 4

$SubLoop:
	sll	$22,   1, $22		# fp << 1
	and	$22, $23, $0
	
	subq	$1,   1, $1		# exp--
	beq	$0, $SubLoop

	stq	$22,   0($30)
	nop
	nop
	nop

	ldt	$f16,  0($30)
	cpyse	$f22, $f16, $f11	# copy E
	br	$SubContinue
	.end	log