File: atan.S

package info (click to toggle)
libffm 0.28-5
links: PTS
area: main
in suites: sarge
size: 216 kB
ctags: 185
sloc: asm: 3,028; makefile: 94; ansic: 12; sh: 2
file content (203 lines) | stat: -rw-r--r-- 6,012 bytes
parent folder | download | duplicates (4)
/*
   libffm	- Free, pretty fast replacement for some math (libm) routines 
			on Linux/AXP, optimized for the 21164

   Copyright (C) 1998  Joachim Wesner <joachim.wesner@frankfurt.netsurf.de>
                  and  Kazushige Goto <goto@statabo.rim.or.jp>

   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with this library (see file COPYING.LIB); if not, write 
   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, 
   MA 02139, USA.
*/
/*
   Fast atan approximation including range reduction by Joachim Wesner,
   <joachim.wesner@frankfurt.netsurf.de>, see also mc 8/1991 p. 78-93.
   Done in June/July 1998.
*/
/*
   improved and modified by Kazushige Goto <goto@statabo.rim.or.jp>
*/
/*
   Now including full illegal argument handling by Joachim Wesner,
   <joachim.wesner@frankfurt.netsurf.de>, November 1998
*/

        .set noreorder
        .set noat

#ifdef __ELF__
	.section .rodata
#else
	.rdata
#endif

        .align 5
        .type    K,@object
K:
        .t_floating  2.67949192431122695801e-1
        .t_floating  1.73205080756887741522e0
        .t_floating  5.77350269189625842081e-1
        .t_floating  1.00000000000000000000e0

        .t_floating  3.73205080756887719318e0
        .t_floating  1.57079632679489655800e0
        .t_floating  5.23598775598298815659e-1
        .t_floating  1.04719755119659763132e0

        .t_floating  4.43895157186999997356e-2
        .t_floating  7.67936869066000032946e-2
        .t_floating  1.11110978980510477498e-1
        .t_floating  1.99999999987294480031e-1

        .t_floating -6.48319351030299945160e-2
        .t_floating -9.09037114191073997160e-2
        .t_floating -1.42857141028255446580e-1
        .t_floating -3.33333333333299286494e-1

.text
        .align 5
        .globl atan
        .ent atan
atan:
        lda     $30, -16($30)
        ldgp    $29,.-atan($27)
	stt	$f16,  0($30)

#ifdef PROF
	lda	$28, _mcount
	jsr	$28, ($28), _mcount
	unop
	unop
#endif
        .prologue       1

        fabs    $f16, $f17
        lda     $1,K
	lda	$2,0x7ff
	sll	$2, 52, $2

	ldq	$0,    0($30)
        ldt     $f18,  0($1)            # K[0]
        ldt     $f19,  8($1)            # K[1]
        ldt     $f20, 16($1)            # K[2]

	and	$0,$2,$5
	beq	$5,$Denormal
	cmpeq	$2,$5,$4		###
	bne	$4,$NaN_and_Inf		###

        cmptlt  $f18, $f17, $f18
        ldt     $f21, 24($1)            # K[3]
        mult    $f19, $f17, $f14
        ldt     $f22, 32($1)            # K[4]

        ldt     $f23, 64($1)            # K[8]
        cmptle  $f17, $f21, $f13        # $f1 = (xa<=K[3])
        lda     $3,   40($1)            # c_addr = &K[5]
        mult    $f20, $f17, $f12

        cmptle  $f17, $f22, $f22
        ldt     $f24, 72($1)            # K[9]
        fclr    $f0                     # corr = 0.0
        ldt     $f25, 80($1)            # K[10]

        ldt     $f26, 88($1)            # K[11]
        ldt     $f27, 96($1)            # K[12]
        ldt     $f28,104($1)            # K[13]
        fbeq    $f18, $L6

        fneg    $f17, $f15              # y7 = -xa
        fbeq    $f13, $L7

        addt    $f19,$f17,$f15          # y7 = K[1] + xa
        lda     $3,  48($1)             # c_addr = &K[6]
        subt    $f14,$f21,$f21          # y5 = y3 - y5
        br      $31,$L8

$L7:
        fbeq    $f22,$L8
        addt    $f20,$f17,$f15          # y7 = K[2] + xa
        lda     $3,  56($1)             # c_addr = &K[7]
        subt    $f12,$f21,$f21          # y5 = y4 - y5
$L8:
        divt    $f21,$f15,$f17          # xa = y5/y7
        ldt     $f0,  0($3)             # load corr
	.align  4

$L6:
        ldt     $f29,112($1)            # K[14]
        mult    $f17, $f17, $f10        # y1 = xa * xa
        ldt     $f30,120($1)            # K[15]
        mult    $f10, $f10, $f11        # y2 = y1 * y1

        mult    $f23, $f10, $f23        # r1 = K[8]  * y1
        mult    $f24, $f10, $f24        # r2 = K[9]  * y1
        mult    $f25, $f10, $f25        # r3 = K[10] * y1
        mult    $f10, $f11, $f12        # y3 = y1 * y2

        mult    $f11, $f11, $f13        # y4 = y2 * y2
        addt    $f23, $f27, $f23        # r1 = r1 + K[12]
        addt    $f24, $f28, $f24        # r2 = r2 + K[13]
        mult    $f26, $f10, $f26        # r4 = K[11] * y1

        addt    $f25, $f29, $f25        # r3 = r3 + K[14]
        mult    $f11, $f12, $f14        # y5 = y3 * y2
        mult    $f12, $f13, $f15        # y7 = y4 * y3
        addt    $f26, $f30, $f26        # r4 = r4 + K[15]

        mult    $f25, $f12, $f25        # r3 = r3 * y3
        mult    $f24, $f14, $f24        # r2 = r2 * y5
        mult    $f23, $f15, $f23        # r1 = r1 * y7
        mult    $f26, $f10, $f26        # r4 = r4 * y1

        addt    $f23, $f24, $f23        # r1 = r1 + r2
        addt    $f25, $f26, $f25        # r3 = r3 + r4
        addt    $f23, $f25, $f23        # r1 = r1 + r3
        mult    $f23, $f17, $f23        # r1 = r1 * xa

        addt    $f23, $f17, $f23        # r1 = r1 + xa
        addt    $f23, $f0,  $f0         # r1 = r1 + corr
        addq    $30,16,$30		###
        fblt    $f16, $L12

        ret
$L12:
        fneg    $f0,  $f0
        ret
	.align 4

$Denormal:
	fmov	$f16,$f0
	lda	$30,16($30)
	ret
	.align 4

$NaN_and_Inf:
	andnot  $0,$2,$3
	sll	$3,1,$3
	bne	$3,$NaN
	ldt	$f0,40($1)
	cpys	$f16,$f0,$f0
	lda	$30,16($30)
	ret
	.align 4

$NaN:
	lda	$0,-1
	stq	$0,  0($30)
	ldt	$f0, 0($30)
	lda	$30,16($30)
	ret

        .end atan