1
1
mirror of https://github.com/wader/fq.git synced 2024-12-23 05:13:30 +03:00
fq/internal/mathex/float80.go
2023-03-09 15:16:52 +01:00

216 lines
6.0 KiB
Go

// Float80 type from https://github.com/mewspring/mewmew-l
// modified as bit to read bytes instead of hex string
//
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
// For more information, please refer to <http://unlicense.org>
package mathex
import (
"fmt"
"log"
"math"
"math/big"
)
// Float80 represents an 80-bit IEEE 754 extended precision floating-point
// value, in x86 extended precision format.
//
// References:
//
// https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format
type Float80 struct {
// Sign and exponent.
//
// 1 bit: sign
// 15 bits: exponent
se uint16
// Integer part and fraction.
//
// 1 bit: integer part
// 63 bits: fraction
m uint64
}
// Bits returns the IEEE 754 binary representation of f, with the sign and
// exponent in se and the mantissa in m.
func (f Float80) Bits() (se uint16, m uint64) {
return f.se, f.m
}
// Bytes returns the x86 extended precision binary representation of f as a byte
// slice.
func (f Float80) Bytes() []byte {
return []byte(f.String())
}
// String returns the IEEE 754 binary representation of f as a string,
// containing 10 bytes in hexadecimal format.
func (f Float80) String() string {
return fmt.Sprintf("%04X%016X", f.se, f.m)
}
// Float64 returns the float64 representation of f.
func (f Float80) Float64() float64 {
se := uint64(f.se)
m := f.m
// 1 bit: sign
sign := se >> 15
// 15 bits: exponent
exp := se & 0x7FFF
// Adjust for exponent bias.
//
// === [ binary64 ] =========================================================
//
// Exponent bias 1023.
//
// +===========================+=======================+
// | Exponent (in binary) | Notes |
// +===========================+=======================+
// | 00000000000 | zero/subnormal number |
// +---------------------------+-----------------------+
// | 00000000001 - 11111111110 | normalized value |
// +---------------------------+-----------------------+
// | 11111111111 | infinity/NaN |
// +---------------------------+-----------------------+
//
// References:
// https://en.wikipedia.org/wiki/Double-precision_floating-point_format#Exponent_encoding
exp64 := int64(exp) - 16383 + 1023
switch {
case exp == 0:
// exponent is all zeroes.
exp64 = 0
case exp == 0x7FFF:
// exponent is all ones.
exp64 = 0x7FF
default:
}
// 63 bits: fraction
frac := m & 0x7FFFFFFFFFFFFFFF
// Sign, exponent and fraction of binary64.
//
// 1 bit: sign
// 11 bits: exponent
// 52 bits: fraction
//
// References:
// https://en.wikipedia.org/wiki/Double-precision_floating-point_format#IEEE_754_double-precision_binary_floating-point_format:_binary64
bits := sign<<63 | uint64(exp64)<<52 | frac>>11
return math.Float64frombits(bits)
}
// BigFloat returns the *big.Float representation of f.
func (f Float80) BigFloat() *big.Float {
x := &big.Float{}
sign := (f.se & 0x8000) != 0
e := f.se & 0x7FFF
s := fmt.Sprintf("0x.%Xp%d", f.m, e-16383+1)
if sign {
s = "-" + s
}
x.SetPrec(52)
_, _, err := x.Parse(s, 0)
if err != nil {
log.Printf("big.Float.Parse: error %v", err)
}
return x
}
// NewFloat80FromFloat64 returns the nearest 80-bit floating-point value for x.
func NewFloat80FromFloat64(x float64) Float80 {
// Sign, exponent and fraction of binary64.
//
// 1 bit: sign
// 11 bits: exponent
// 52 bits: fraction
bits := math.Float64bits(x)
// 1 bit: sign
sign := uint16(bits >> 63)
// 11 bits: exponent
exp := bits >> 52 & 0x7FF
// 52 bits: fraction
frac := bits & 0xFFFFFFFFFFFFF
if exp == 0 && frac == 0 {
// zero value.
return Float80{}
}
// Sign, exponent and fraction of binary80.
//
// 1 bit: sign
// 15 bits: exponent
// 1 bit: integer part
// 63 bits: fraction
// 15 bits: exponent.
//
// Exponent bias 1023 (binary64)
// Exponent bias 16383 (binary80)
exp80 := int64(exp) - 1023 + 16383
// 63 bits: fraction.
//
frac80 := frac << 11
switch {
case exp == 0:
exp80 = 0
case exp == 0x7FF:
exp80 = 0x7FFF
}
se := sign<<15 | uint16(exp80)
// Integer part set to specify normalized value.
m := 0x8000000000000000 | frac80
return NewFloat80FromBits(se, m)
}
// NewFloat80FromBytes returns a new 80-bit floating-point value based on b,
func NewFloat80FromBytes(b []byte) Float80 {
var f Float80
if len(b) != 10 {
panic(fmt.Errorf("invalid length of float80 representation, expected 10, got %d", len(b)))
}
f.se = uint16(int64(b[0])<<8 | int64(b[1]<<0))
f.m = uint64(0 |
int64(b[2])<<56 |
int64(b[3])<<48 |
int64(b[4])<<40 |
int64(b[5])<<32 |
int64(b[6])<<24 |
int64(b[7])<<16 |
int64(b[8])<<8 |
int64(b[9])<<0,
)
return f
}
// NewFloat80FromBits returns a new 80-bit floating-point value based on the
// sign, exponent and mantissa bits.
func NewFloat80FromBits(se uint16, m uint64) Float80 {
return Float80{
se: se,
m: m,
}
}