1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
/*---------------------------------------------------------------------------+
| polynomial_Xsig.c |
| $Id: polynom_Xsig.c,v 1.2 2001/10/06 03:53:46 bdenney Exp $
| |
| Fixed point arithmetic polynomial evaluation. |
| |
| Copyright (C) 1992,1993,1994,1995,1999 |
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
| Australia. E-mail billm@melbpc.org.au |
| |
| Computes: |
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n]*x)*x)*x)*x) ... )*x |
| and adds the result to the 12 byte Xsig. |
| The terms[] are each 8 bytes, but all computation is performed to 12 byte |
| precision. |
| |
| This function must be used carefully: most overflow of intermediate |
| results is controlled, but overflow of the result is not. |
| |
+---------------------------------------------------------------------------*/
#include "fpu_emu.h"
#include "poly.h"
void polynomial_Xsig(Xsig *accum, const u64 *x, const u64 terms[], const int n)
{
int i;
Xsig acc, Xprod;
u32 lprod;
u64 xlwr, xupr, prod;
char overflowed;
xlwr = (u32)(*x);
xupr = (u32)((*x) >> 32);
acc.msw = terms[n] >> 32;
acc.midw = terms[n];
acc.lsw = 0;
overflowed = 0;
for ( i = n-1; i >= 0; i-- )
{
/* Split the product into five parts to get a 16 byte result */
/* first word by first word */
prod = acc.msw * xupr;
Xprod.midw = prod;
Xprod.msw = prod >> 32;
/* first word by second word */
prod = acc.msw * xlwr;
Xprod.lsw = prod;
lprod = prod >> 32;
Xprod.midw += lprod;
if ( lprod > Xprod.midw )
Xprod.msw ++;
/* second word by first word */
prod = acc.midw * xupr;
Xprod.lsw += prod;
if ( (u32)prod > Xprod.lsw )
{
Xprod.midw ++;
if ( Xprod.midw == 0 )
Xprod.msw ++;
}
lprod = prod >> 32;
Xprod.midw += lprod;
if ( lprod > Xprod.midw )
Xprod.msw ++;
/* second word by second word */
prod = acc.midw * xlwr;
lprod = prod >> 32;
Xprod.lsw += lprod;
if ( lprod > Xprod.lsw )
{
Xprod.midw ++;
if ( Xprod.midw == 0 )
Xprod.msw ++;
}
/* third word by first word */
prod = acc.lsw * xupr;
lprod = prod >> 32;
Xprod.lsw += lprod;
if ( lprod > Xprod.lsw )
{
Xprod.midw ++;
if ( Xprod.midw == 0 )
Xprod.msw ++;
}
if ( overflowed )
{
Xprod.midw += xlwr;
if ( (u32)xlwr > Xprod.midw )
Xprod.msw ++;
Xprod.msw += xupr;
overflowed = 0; /* We don't check this addition for overflow */
}
acc.lsw = Xprod.lsw;
acc.midw = (u32)terms[i] + Xprod.midw;
acc.msw = (terms[i] >> 32) + Xprod.msw;
if ( Xprod.msw > acc.msw )
overflowed = 1;
if ( (u32)terms[i] > acc.midw )
{
acc.msw ++;
if ( acc.msw == 0 )
overflowed = 1;
}
}
/* We don't check the addition to accum for overflow */
accum->lsw += acc.lsw;
if ( acc.lsw > accum->lsw )
{
accum->midw ++;
if ( accum->midw == 0 )
accum->msw ++;
}
accum->midw += acc.midw;
if ( acc.midw > accum->midw )
{
accum->msw ++;
}
accum->msw += acc.msw;
}
|