File: log.S

package info (click to toggle)
libffm 0.21-2
links: PTS
area: main
in suites: slink
size: 172 kB
ctags: 94
sloc: asm: 1,176; makefile: 76; ansic: 10; sh: 2
file content (202 lines) | stat: -rw-r--r-- 4,468 bytes
/*
   libffm	- Free, pretty fast replacement for some math (libm) routines 
			on Linux/AXP, optimized for the 21164

   Copyright (C) 1998  Joachim Wesner <joachim.wesner@frankfurt.netsurf.de>
                  and  Kazushige Goto <goto@statabo.rim.or.jp>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with this library (see file COPYING.LIB); if not, write 
   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, 
   MA 02139, USA.
*/
/*
   Fast log approximation(s) including range reduction by Joachim Wesner,
   joachim.wesner@frankfurt.netsurf.de, see also mc 8/1991 p. 78-93.
   July 12 1998 JW.

   No special handling of illegal arguments or NANs, yet !!!
*/

/* improved and re-scheduled by Kazushige Goto <goto@statabo.rim.or.jp> */

	.set noat
	.set noreorder

#ifdef __ELF__
	.section .rodata
#else
	.rdata
#endif

	.align 5
R:
	.t_floating 7.07106781186547572737e-1	# SQRT05
	.t_floating 1.00000000000000000000e0
	.t_floating 6.93147180559945286227e-1	# LOG2
	.t_floating 3.01029995663981198017e-1	# LOG10_2

	/* Derived from Chebyshev Approx. */
	/* rel. error 1.5e-18             */
	.t_floating 2.62332293764593771357e-1
	.t_floating 2.13182032931477888349e-1
	.t_floating 4.12198585366190917156e-1
	.t_floating 9.61796693925982992823e-1
	.t_floating 3.20598577851792521098e-1
	.t_floating 2.20962847584267652046e-1
	.t_floating 5.77078016348200772967e-1
	.t_floating 2.88539008177792677401e0

.text

	.align 5
	.globl log2
	.ent log2
log2:
	lda	$30,  -16($30)
	ldgp	$29,.-log2($27)
	stt	$f16,   0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue	1

	lda	$2,  R
	ldt	$f22, 0($2)		# R[0]
	ldt	$f1,  8($2)
	lda	$3,   8($2)		# LOG_X

	br	$31, $continue
	.end log2

	.align 5
	.globl log10
	.ent log10
log10:
	lda	$30,  -16($30)
	ldgp	$29,.-log10($27)
	stt	$f16,   0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue	1

	lda	$2,  R
	ldt	$f22, 0($2)		# R[0]
	ldt	$f1,  8($2)
	lda	$3,  24($2)		# LOG_X

	br	$31, $continue
	.end log10

	.align 5
	.globl log
	.ent log
log:
	lda	$30,   -16($30)
	ldgp	$29,.-log($27)
	stt	$f16,    0($30)
	.frame	$30,16,$26,0

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue	1

	lda	$2,  R
	ldt	$f22, 0($2)		# R[0]
	ldt	$f1,  8($2)
	lda	$3,  16($2)		# LOG_X
	.align 4	

$continue:
	cpyse	$f22, $f16, $f11	# copy E
	cmptlt	$f11, $f22, $f10
	subt	$f11, $f1,  $f29	# y4 = x - R[1]
	addt	$f11, $f1,  $f30	# y6 = x + R[1]

	fbeq	$f10, $34		# if (x<y)
	addt	$f29, $f11, $f29	# y4 += x
	addt	$f30, $f11, $f30	# y6 += x
$34:
	divt	$f29, $f30, $f18

# wait, wait, wait, for a long time

	ldq	$1,     0($30)
	ldt	$f23,  32($2)		# R[4]
	ldt	$f27,  64($2)		# R[8]

	srl	$1, 52, $1
	lda	$1,-1022($1)

	fbeq	$f10, $35		# if (x<y)
	subl	$1,1,$1
$35:
	stq	$1,8($30)

	ldt	$f24,  40($2)		# R[5]
	ldt	$f25,  48($2)		# R[6]
	ldt	$f26,  56($2)		# R[7]
	ldt	$f28,  72($2)		# R[9]
	ldt	$f29,  80($2)		# R[10]
	ldt	$f30,  88($2)		# R[11]

	ldt	$f15,  8($30)		# load iexp
	addq	$30, 16, $30
	mult	$f18, $f18, $f19	# y  = x  * x

	mult	$f19, $f19, $f20	# y2 = y  * y
	mult	$f23, $f19, $f23	# t2 = R[4] * y
	mult	$f24, $f19, $f24	# t1 = R[5] * y
	mult	$f25, $f19, $f25	# t3 = R[6] * y

	mult	$f20, $f20, $f21	# y4 = y2 * y2
	mult	$f26, $f19, $f26	# t4 = R[7] * y
	addt	$f23, $f27, $f23	# t2 += R[8]
	addt	$f24, $f28, $f24	# t1 += R[9]

	addt	$f25, $f29, $f25	# t3 += R[10]
	addt	$f26, $f30, $f26	# t4 += R[11]

	mult	$f21, $f20, $f22	# y6 = y4 * y2
	ldt	$f1,  0($3)
	mult	$f23, $f21, $f23	# t2 *= y4
	mult	$f24, $f22, $f24	# t1 *= y6
	mult	$f25, $f20, $f25	# t3 *= y2

	cvtqt	$f15, $f0		# int -> float
	addt	$f24, $f23, $f23	# t1 += t2
	addt	$f23, $f25, $f23	# t1 += t3
	addt	$f23, $f26, $f23	# t1 += t4

	mult	$f23, $f18, $f23	# y *= x

	addt	$f23, $f0,  $f0		# y + iexp
	mult	$f0,  $f1,  $f0

	ret	$31,($26),1
	.end	log