1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
|
// Float80 type from https://github.com/mewspring/mewmew-l
// modified as bit to read bytes instead of hex string
//
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
// For more information, please refer to <http://unlicense.org>
package mathex
import (
"fmt"
"log"
"math"
"math/big"
)
// Float80 represents an 80-bit IEEE 754 extended precision floating-point
// value, in x86 extended precision format.
//
// References:
//
// https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format
type Float80 struct {
// Sign and exponent.
//
// 1 bit: sign
// 15 bits: exponent
se uint16
// Integer part and fraction.
//
// 1 bit: integer part
// 63 bits: fraction
m uint64
}
// Bits returns the IEEE 754 binary representation of f, with the sign and
// exponent in se and the mantissa in m.
func (f Float80) Bits() (se uint16, m uint64) {
return f.se, f.m
}
// Bytes returns the x86 extended precision binary representation of f as a byte
// slice.
func (f Float80) Bytes() []byte {
return []byte(f.String())
}
// String returns the IEEE 754 binary representation of f as a string,
// containing 10 bytes in hexadecimal format.
func (f Float80) String() string {
return fmt.Sprintf("%04X%016X", f.se, f.m)
}
// Float64 returns the float64 representation of f.
func (f Float80) Float64() float64 {
se := uint64(f.se)
m := f.m
// 1 bit: sign
sign := se >> 15
// 15 bits: exponent
exp := se & 0x7FFF
// Adjust for exponent bias.
//
// === [ binary64 ] =========================================================
//
// Exponent bias 1023.
//
// +===========================+=======================+
// | Exponent (in binary) | Notes |
// +===========================+=======================+
// | 00000000000 | zero/subnormal number |
// +---------------------------+-----------------------+
// | 00000000001 - 11111111110 | normalized value |
// +---------------------------+-----------------------+
// | 11111111111 | infinity/NaN |
// +---------------------------+-----------------------+
//
// References:
// https://en.wikipedia.org/wiki/Double-precision_floating-point_format#Exponent_encoding
exp64 := int64(exp) - 16383 + 1023
switch {
case exp == 0:
// exponent is all zeroes.
exp64 = 0
case exp == 0x7FFF:
// exponent is all ones.
exp64 = 0x7FF
default:
}
// 63 bits: fraction
frac := m & 0x7FFFFFFFFFFFFFFF
// Sign, exponent and fraction of binary64.
//
// 1 bit: sign
// 11 bits: exponent
// 52 bits: fraction
//
// References:
// https://en.wikipedia.org/wiki/Double-precision_floating-point_format#IEEE_754_double-precision_binary_floating-point_format:_binary64
bits := sign<<63 | uint64(exp64)<<52 | frac>>11
return math.Float64frombits(bits)
}
// BigFloat returns the *big.Float representation of f.
func (f Float80) BigFloat() *big.Float {
x := &big.Float{}
sign := (f.se & 0x8000) != 0
e := f.se & 0x7FFF
s := fmt.Sprintf("0x.%Xp%d", f.m, e-16383+1)
if sign {
s = "-" + s
}
x.SetPrec(52)
_, _, err := x.Parse(s, 0)
if err != nil {
log.Printf("big.Float.Parse: error %v", err)
}
return x
}
// NewFloat80FromFloat64 returns the nearest 80-bit floating-point value for x.
func NewFloat80FromFloat64(x float64) Float80 {
// Sign, exponent and fraction of binary64.
//
// 1 bit: sign
// 11 bits: exponent
// 52 bits: fraction
bits := math.Float64bits(x)
// 1 bit: sign
sign := uint16(bits >> 63)
// 11 bits: exponent
exp := bits >> 52 & 0x7FF
// 52 bits: fraction
frac := bits & 0xFFFFFFFFFFFFF
if exp == 0 && frac == 0 {
// zero value.
return Float80{}
}
// Sign, exponent and fraction of binary80.
//
// 1 bit: sign
// 15 bits: exponent
// 1 bit: integer part
// 63 bits: fraction
// 15 bits: exponent.
//
// Exponent bias 1023 (binary64)
// Exponent bias 16383 (binary80)
exp80 := int64(exp) - 1023 + 16383
// 63 bits: fraction.
//
frac80 := frac << 11
switch {
case exp == 0:
exp80 = 0
case exp == 0x7FF:
exp80 = 0x7FFF
}
se := sign<<15 | uint16(exp80)
// Integer part set to specify normalized value.
m := 0x8000000000000000 | frac80
return NewFloat80FromBits(se, m)
}
// NewFloat80FromBytes returns a new 80-bit floating-point value based on b,
func NewFloat80FromBytes(b []byte) Float80 {
var f Float80
if len(b) != 10 {
panic(fmt.Errorf("invalid length of float80 representation, expected 10, got %d", len(b)))
}
f.se = uint16(int64(b[0])<<8 | int64(b[1]<<0))
f.m = uint64(0 |
int64(b[2])<<56 |
int64(b[3])<<48 |
int64(b[4])<<40 |
int64(b[5])<<32 |
int64(b[6])<<24 |
int64(b[7])<<16 |
int64(b[8])<<8 |
int64(b[9])<<0,
)
return f
}
// NewFloat80FromBits returns a new 80-bit floating-point value based on the
// sign, exponent and mantissa bits.
func NewFloat80FromBits(se uint16, m uint64) Float80 {
return Float80{
se: se,
m: m,
}
}
|