File: polynom_Xsig.c

package info (click to toggle)
bochs 1.4pre2-1
  • links: PTS
  • area: main
  • in suites: woody
  • size: 7,656 kB
  • ctags: 10,322
  • sloc: cpp: 66,880; ansic: 19,674; sh: 2,951; makefile: 2,183; asm: 2,110; yacc: 723; lex: 171; csh: 147; perl: 35
file content (133 lines) | stat: -rw-r--r-- 3,869 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*---------------------------------------------------------------------------+
 |  polynomial_Xsig.c                                                        |
 |  $Id: polynom_Xsig.c,v 1.2 2001/10/06 03:53:46 bdenney Exp $
 |                                                                           |
 | Fixed point arithmetic polynomial evaluation.                             |
 |                                                                           |
 | Copyright (C) 1992,1993,1994,1995,1999                                    |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@melbpc.org.au              |
 |                                                                           |
 | Computes:                                                                 |
 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n]*x)*x)*x)*x) ... )*x    |
 | and adds the result to the 12 byte Xsig.                                  |
 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
 | precision.                                                                |
 |                                                                           |
 | This function must be used carefully: most overflow of intermediate       |
 | results is controlled, but overflow of the result is not.                 |
 |                                                                           |
 +---------------------------------------------------------------------------*/

#include "fpu_emu.h"
#include "poly.h"


void polynomial_Xsig(Xsig *accum, const u64 *x, const u64 terms[], const int n)
{
  int  i;
  Xsig acc, Xprod;
  u32  lprod;
  u64  xlwr, xupr, prod;
  char overflowed;

  xlwr = (u32)(*x);
  xupr = (u32)((*x) >> 32);

  acc.msw = terms[n] >> 32;
  acc.midw = terms[n];
  acc.lsw = 0;
  overflowed = 0;

  for ( i = n-1; i >= 0; i-- )
    {
      /* Split the product into five parts to get a 16 byte result */

      /* first word by first word */
      prod = acc.msw * xupr;
      Xprod.midw = prod;
      Xprod.msw = prod >> 32;

      /* first word by second word */
      prod = acc.msw * xlwr;
      Xprod.lsw = prod;
      lprod = prod >> 32;
      Xprod.midw += lprod;
      if ( lprod > Xprod.midw )
	Xprod.msw ++;

      /* second word by first word */
      prod = acc.midw * xupr;
      Xprod.lsw += prod;
      if ( (u32)prod > Xprod.lsw )
	{
	  Xprod.midw ++;
	  if ( Xprod.midw == 0 )
	    Xprod.msw ++;
	}
      lprod = prod >> 32;
      Xprod.midw += lprod;
      if ( lprod > Xprod.midw )
	Xprod.msw ++;

      /* second word by second word */
      prod = acc.midw * xlwr;
      lprod = prod >> 32;
      Xprod.lsw += lprod;
      if ( lprod > Xprod.lsw )
	{
	  Xprod.midw ++;
	  if ( Xprod.midw == 0 )
	    Xprod.msw ++;
	}

      /* third word by first word */
      prod = acc.lsw * xupr;
      lprod = prod >> 32;
      Xprod.lsw += lprod;
      if ( lprod > Xprod.lsw )
	{
	  Xprod.midw ++;
	  if ( Xprod.midw == 0 )
	    Xprod.msw ++;
	}

      if ( overflowed )
	{
	  Xprod.midw += xlwr;
	  if ( (u32)xlwr > Xprod.midw )
	    Xprod.msw ++;
	  Xprod.msw += xupr;
	  overflowed = 0;    /* We don't check this addition for overflow */
	}
      
      acc.lsw = Xprod.lsw;
      acc.midw = (u32)terms[i] + Xprod.midw;
      acc.msw = (terms[i] >> 32) + Xprod.msw;
      if ( Xprod.msw > acc.msw )
	overflowed = 1;
      if ( (u32)terms[i] > acc.midw )
	{
	  acc.msw ++;
	  if ( acc.msw == 0 )
	    overflowed = 1;
	}
    }

  /* We don't check the addition to accum for overflow */
  accum->lsw += acc.lsw;
  if ( acc.lsw > accum->lsw )
    {
      accum->midw ++;
      if ( accum->midw == 0 )
	accum->msw ++;
    }
  accum->midw += acc.midw;
  if ( acc.midw > accum->midw )
    {
      accum->msw ++;
    }
  accum->msw += acc.msw;
}