1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
package fast
import "math"
const (
diyFpKSignificandSize = 64
kSignificandSize = 53
kUint64MSB uint64 = 1 << 63
kSignificandMask = 0x000FFFFFFFFFFFFF
kHiddenBit = 0x0010000000000000
kExponentMask = 0x7FF0000000000000
kPhysicalSignificandSize = 52 // Excludes the hidden bit.
kExponentBias = 0x3FF + kPhysicalSignificandSize
kDenormalExponent = -kExponentBias + 1
)
type double float64
type diyfp struct {
f uint64
e int
}
// f =- o.
// The exponents of both numbers must be the same and the significand of this
// must be bigger than the significand of other.
// The result will not be normalized.
func (f *diyfp) subtract(o diyfp) {
_DCHECK(f.e == o.e)
_DCHECK(f.f >= o.f)
f.f -= o.f
}
// Returns f - o
// The exponents of both numbers must be the same and this must be bigger
// than other. The result will not be normalized.
func (f diyfp) minus(o diyfp) diyfp {
res := f
res.subtract(o)
return res
}
// f *= o
func (f *diyfp) mul(o diyfp) {
// Simply "emulates" a 128 bit multiplication.
// However: the resulting number only contains 64 bits. The least
// significant 64 bits are only used for rounding the most significant 64
// bits.
const kM32 uint64 = 0xFFFFFFFF
a := f.f >> 32
b := f.f & kM32
c := o.f >> 32
d := o.f & kM32
ac := a * c
bc := b * c
ad := a * d
bd := b * d
tmp := (bd >> 32) + (ad & kM32) + (bc & kM32)
// By adding 1U << 31 to tmp we round the final result.
// Halfway cases will be round up.
tmp += 1 << 31
result_f := ac + (ad >> 32) + (bc >> 32) + (tmp >> 32)
f.e += o.e + 64
f.f = result_f
}
// Returns f * o
func (f diyfp) times(o diyfp) diyfp {
res := f
res.mul(o)
return res
}
func (f *diyfp) _normalize() {
f_, e := f.f, f.e
// This method is mainly called for normalizing boundaries. In general
// boundaries need to be shifted by 10 bits. We thus optimize for this case.
const k10MSBits uint64 = 0x3FF << 54
for f_&k10MSBits == 0 {
f_ <<= 10
e -= 10
}
for f_&kUint64MSB == 0 {
f_ <<= 1
e--
}
f.f, f.e = f_, e
}
func normalizeDiyfp(f diyfp) diyfp {
res := f
res._normalize()
return res
}
// f must be strictly greater than 0.
func (d double) toNormalizedDiyfp() diyfp {
f, e := d.sigExp()
// The current float could be a denormal.
for (f & kHiddenBit) == 0 {
f <<= 1
e--
}
// Do the final shifts in one go.
f <<= diyFpKSignificandSize - kSignificandSize
e -= diyFpKSignificandSize - kSignificandSize
return diyfp{f, e}
}
// Returns the two boundaries of this.
// The bigger boundary (m_plus) is normalized. The lower boundary has the same
// exponent as m_plus.
// Precondition: the value encoded by this Double must be greater than 0.
func (d double) normalizedBoundaries() (m_minus, m_plus diyfp) {
v := d.toDiyFp()
significand_is_zero := v.f == kHiddenBit
m_plus = normalizeDiyfp(diyfp{f: (v.f << 1) + 1, e: v.e - 1})
if significand_is_zero && v.e != kDenormalExponent {
// The boundary is closer. Think of v = 1000e10 and v- = 9999e9.
// Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
// at a distance of 1e8.
// The only exception is for the smallest normal: the largest denormal is
// at the same distance as its successor.
// Note: denormals have the same exponent as the smallest normals.
m_minus = diyfp{f: (v.f << 2) - 1, e: v.e - 2}
} else {
m_minus = diyfp{f: (v.f << 1) - 1, e: v.e - 1}
}
m_minus.f <<= m_minus.e - m_plus.e
m_minus.e = m_plus.e
return
}
func (d double) toDiyFp() diyfp {
f, e := d.sigExp()
return diyfp{f: f, e: e}
}
func (d double) sigExp() (significand uint64, exponent int) {
d64 := math.Float64bits(float64(d))
significand = d64 & kSignificandMask
if d64&kExponentMask != 0 { // not denormal
significand += kHiddenBit
exponent = int((d64&kExponentMask)>>kPhysicalSignificandSize) - kExponentBias
} else {
exponent = kDenormalExponent
}
return
}
|