File: sqrt.S

package info (click to toggle)
libffm 0.21-2
links: PTS
area: main
in suites: slink
size: 172 kB
ctags: 94
sloc: asm: 1,176; makefile: 76; ansic: 10; sh: 2
file content (120 lines) | stat: -rw-r--r-- 4,100 bytes
/* Copyright (C) 1996 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David Mosberger <davidm@cs.arizona.edu>, 1996.
   Based on public-domain C source by Linus Torvalds.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with the GNU C Library; see the file COPYING.LIB.  If not,
   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
   Boston, MA 02111-1307, USA. */

/* This version is much faster than generic sqrt implementation, but
   it doesn't handle exceptional values or the inexact flag.  Don't use
   this if _IEEE_FP or _IEEE_FP_INEXACT is in effect. */

/* Faster 1 ulp error version by removing final last-bit fiddling, by 
   Joachim Wesner <joachim.wesner@frankfurt.netsurf.de>, July 1998 */

/* Modified and re-scheduled by Kazushige Goto <goto@statabo.rim.or.jp> */

	.set noat
	.set noreorder

#ifdef __ELF__
	.section .rodata
#else
	.rdata
#endif

	.align 5        # align to cache line

sqrtdata:
	.quad 0x3fefffffffffffff         
	.quad 0x3ff0000000000001         
	.quad 0x3fe0000000000000         
	.quad 0x3ff7ffffffc00000         

.long   0x1500, 0x2ef8,   0x4d67,  0x6b02,  0x87be,  0xa395,  0xbe7a,  0xd866
.long   0xf14a, 0x1091b, 0x11fcd, 0x13552, 0x14999, 0x15c98, 0x16e34, 0x17e5f
.long  0x18d03, 0x19a01, 0x1a545, 0x1ae8a, 0x1b5c4, 0x1bb01, 0x1bfde, 0x1c28d
.long  0x1c2de, 0x1c0db, 0x1ba73, 0x1b11c, 0x1a4b5, 0x1953d, 0x18266, 0x16be0
.long  0x1683e, 0x179d8, 0x18a4d, 0x19992, 0x1a789, 0x1b445, 0x1bf61, 0x1c989
.long  0x1d16d, 0x1d77b, 0x1dddf, 0x1e2ad, 0x1e5bf, 0x1e6e8, 0x1e654, 0x1e3cd
.long  0x1df2a, 0x1d635, 0x1cb16, 0x1be2c, 0x1ae4e, 0x19bde, 0x1868e, 0x16e2e
.long  0x1527f, 0x1334a, 0x11051,  0xe951,  0xbe01,  0x8e0d,  0x5924,  0x1edd

	.text

	.globl  sqrt
	.align 5
	.ent  sqrt , 0
sqrt: 	
	.frame $30 ,  16, $26  
	lda	$30 ,    -16($30)
	ldgp	$29 , .-sqrt($27)
	stt	$f16, 0($30) 

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
	.prologue 1

	lda	$4 , sqrtdata			# load base address into $4 
	ldah	$2 , 0x5fe8			# e0    :
	nop
	ldq	$3 ,  0($30) 			# .. e1 :

	ldt	$f12,  0x10($4)			# e0    :
	ldt	$f18,  0x18($4)			# .. e1 :
	srl	$3 , 33, $1 			# e0    :
	mult	$f16, $f12, $f11
	subl	$2 , $1 , $2 			# e0    :
	addt	$f12, $f12, $f17		# .. fa : $f17 = 1.0
	srl	$2 , 12, $1 			# e0    :
	and	$1 , 0xfc, $1 			# .. e1 :
	addq	$1 , $4 , $1 			# e0    :
	ldl	$1 , 0x20 ($1)			# .. e1 :
	addt	$f12, $f17, $f15		# fa    : $f15 = 1.5
	subl	$2 , $1 , $2 			# .. e1 :
	sll	$2 , 32, $2 			# e0    :
	ldt	$f14, 0x00 ($4)			# .. e1 :
	stq	$2 ,  8($30) 				# e0    :
	nop
	nop
	nop
	ldt	$f13, 8($30) 				# e1    :
	addq	$30 , 16, $30 			# e0    :

	mult	$f11, $f13, $f10	# fm    : $f10 = (x * 0.5) * y
	mult	$f10, $f13, $f10	# fm    : $f10 = ((x * 0.5) * y) * y
	subt	$f15, $f10, $f1		# fa    : $f1 = (1.5 - 0.5*x*y*y)
	mult	$f13, $f1, $f13         # fm    : yp = y*(1.5 - 0.5*x*y*y)

 	mult	$f11, $f13, $f1		# fm    : $f11 = x * 0.5 * yp
	mult	$f1,  $f13, $f11	# fm    : $f11 = (x * 0.5 * yp) * yp
	subt	$f18, $f11, $f1		# fa    : $f1= (1.5-2^-30) - 0.5*x*yp*yp
	mult	$f13, $f1, $f13		# fm    : ypp = $f13 = yp*$f1
	subt	$f15, $f12, $f1		# fa    : $f1 = (1.5 - 0.5)
	mult	$f16, $f13, $f10	# fm    : z = $f10 = x * ypp
	mult	$f10, $f13, $f11	# fm    : $f11 = z*ypp
	mult	$f10, $f12, $f12	# fm    : $f12 = z*0.5
	subt	$f1, $f11, $f1		# .. fa : $f1 = 1 - z*ypp
	mult	$f12, $f1, $f12		# fm    : $f12 = z*0.5*(1 - z*ypp)
	addt	$f10, $f12, $f0		# fa    : zp=res=$f0= z + z*0.5*(1 - z*ypp)

	ret

	.end  sqrt