// Float64frombits returns the floating point number corresponding // the IEEE 754 binary representation b. //func Float64frombits(b uint64) float64 { return *(*float64)(unsafe.Pointer(&b)) } func glrFloat64frombits(b uint64) float64 { var Zero = 0.0 //var NegZero = -Zero var NaN = Zero / Zero s := float64(+1) if b&(1<<63) != 0 { s = -1 } e := (b >> 52) & (1<<11 - 1) m := b & (1<<52 - 1) if e == (1<<11)-1 { if m == 0 { return s / 0 } return NaN } if e != 0 { m += 1 << 52 } if e == 0 { e = 1 } return math.Ldexp(float64(m), int(e)-1023-52) * s }
// Float32frombits returns the floating point number corresponding // to the IEEE 754 binary representation b. // func Float32frombits(b uint32) float32 { return *(*float32)(unsafe.Pointer(&b)) } func glrFloat32frombits(b uint32) float32 { var Zero = 0.0 //var NegZero = -Zero var NaN = Zero / Zero s := float32(+1) if b&(1<<31) != 0 { s = -1 } e := (b >> 23) & (1<<8 - 1) m := b & (1<<23 - 1) if e == (1<<8)-1 { if m == 0 { return s / 0 // Inf } return float32(NaN) } if e != 0 { m += 1 << 23 } if e == 0 { e = 1 } return float32(math.Ldexp(float64(m), int(e)-127-23)) * s }
// convertFloat converts the string to a float64value. func (s *ss) convertFloat(str string, n int) float64 { if p := indexRune(str, 'p'); p >= 0 { // Atof doesn't handle power-of-2 exponents, // but they're easy to evaluate. f, err := strconv.ParseFloat(str[:p], n) if err != nil { // Put full string into error. if e, ok := err.(*strconv.NumError); ok { e.Num = str } s.error(err) } m, err := strconv.Atoi(str[p+1:]) if err != nil { // Put full string into error. if e, ok := err.(*strconv.NumError); ok { e.Num = str } s.error(err) } return math.Ldexp(f, m) } f, err := strconv.ParseFloat(str, n) if err != nil { s.error(err) } return f }
func float32Unpack(x uint32) float32 { mantissa := float64(x & 0x1fffff) if x&0x80000000 != 0 { mantissa = -mantissa } exponent := (x & 0x7fe00000) >> 21 return float32(math.Ldexp(mantissa, int(exponent)-788)) }
func TestFloat32Distribution(t *testing.T) { //switch runtime.GOARCH { //case "cs", "java": // return //} // Generate a distribution of (sign, mantissa, exp) values // broader than the float32 range, and check Rat.Float32() // always picks the closest float32 approximation. var add = []int64{ 0, 1, 3, 5, 7, 9, 11, } var winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64) if testing.Short() { winc, einc = 5, 15 // quick test (~60ms on x86-64) } for _, sign := range "+-" { for _, a := range add { for wid := uint64(0); wid < 30; wid += winc { b := 1<<wid + a if sign == '-' { b = -b } for exp := -150; exp < 150; exp += einc { num, den := NewInt(b), NewInt(1) if exp > 0 { num.Lsh(num, uint(exp)) } else { den.Lsh(den, uint(-exp)) } r := new(Rat).SetFrac(num, den) f, _ := r.Float32() if !checkIsBestApprox32(t, f, r) { // Append context information. t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)", b, exp, f, f, math.Ldexp(float64(b), exp), r) } checkNonLossyRoundtrip32(t, f) } } } } }
func (f ExactFloat) ToDoubleHelper() float64 { sign := float64(f.sign) if !f.is_normal() { if f.is_zero() { return math.Copysign(0, sign) } if f.is_inf() { return math.Inf(f.sign) } return math.Copysign(math.NaN(), sign) } mantissa := f.bn.Uint64() return sign * math.Ldexp(float64(mantissa), f.bn_exp) }
// ClipToPaddedFace returns the (u,v) coordinates for the portion of the edge AB that // intersects the given face, but rather than clipping to the square [-1,1]x[-1,1] // in (u,v) space, this method clips to [-R,R]x[-R,R] where R=(1+padding). // Padding must be non-negative. func ClipToPaddedFace(a, b Point, f int, padding float64) (aUV, bUV r2.Point, intersects bool) { // Fast path: both endpoints are on the given face. if face(a.Vector) == f && face(b.Vector) == f { au, av := validFaceXYZToUV(f, a.Vector) bu, bv := validFaceXYZToUV(f, b.Vector) return r2.Point{au, av}, r2.Point{bu, bv}, true } // Convert everything into the (u,v,w) coordinates of the given face. Note // that the cross product *must* be computed in the original (x,y,z) // coordinate system because PointCross (unlike the mathematical cross // product) can produce different results in different coordinate systems // when one argument is a linear multiple of the other, due to the use of // symbolic perturbations. normUVW := pointUVW(faceXYZtoUVW(f, a.PointCross(b))) aUVW := pointUVW(faceXYZtoUVW(f, a)) bUVW := pointUVW(faceXYZtoUVW(f, b)) // Padding is handled by scaling the u- and v-components of the normal. // Letting R=1+padding, this means that when we compute the dot product of // the normal with a cube face vertex (such as (-1,-1,1)), we will actually // compute the dot product with the scaled vertex (-R,-R,1). This allows // methods such as intersectsFace, exitAxis, etc, to handle padding // with no further modifications. scaleUV := 1 + padding scaledN := pointUVW{r3.Vector{X: scaleUV * normUVW.X, Y: scaleUV * normUVW.Y, Z: normUVW.Z}} if !scaledN.intersectsFace() { return aUV, bUV, false } // TODO(roberts): This is a workaround for extremely small vectors where some // loss of precision can occur in Normalize causing underflow. When PointCross // is updated to work around this, this can be removed. if math.Max(math.Abs(normUVW.X), math.Max(math.Abs(normUVW.Y), math.Abs(normUVW.Z))) < math.Ldexp(1, -511) { normUVW = pointUVW{normUVW.Mul(math.Ldexp(1, 563))} } normUVW = pointUVW{normUVW.Normalize()} aTan := pointUVW{normUVW.Cross(aUVW.Vector)} bTan := pointUVW{bUVW.Cross(normUVW.Vector)} // As described in clipDestination, if the sum of the scores from clipping the two // endpoints is 3 or more, then the segment does not intersect this face. aUV, aScore := clipDestination(bUVW, aUVW, pointUVW{scaledN.Mul(-1)}, bTan, aTan, scaleUV) bUV, bScore := clipDestination(aUVW, bUVW, scaledN, aTan, bTan, scaleUV) return aUV, bUV, aScore+bScore < 3 }
func TestFloat64Distribution(t *testing.T) { // Generate a distribution of (sign, mantissa, exp) values // broader than the float64 range, and check Rat.Float64() // always picks the closest float64 approximation. var add = []int64{ 0, 1, 3, 5, 7, 9, 11, } var winc, einc = uint64(1), int(1) // soak test (~75s on x86-64) if testing.Short() { winc, einc = 10, 500 // quick test (~12ms on x86-64) } for _, sign := range "+-" { for _, a := range add { for wid := uint64(0); wid < 60; wid += winc { b := int64(1<<wid + a) if sign == '-' { b = -b } for exp := -1100; exp < 1100; exp += einc { num, den := NewInt(b), NewInt(1) if exp > 0 { num.Lsh(num, uint(exp)) } else { den.Lsh(den, uint(-exp)) } r := new(Rat).SetFrac(num, den) f, _ := r.Float64() if !checkIsBestApprox(t, f, r) { // Append context information. t.Errorf("(input was mantissa %#x, exp %d; f=%g (%b); f~%g; r=%v)", b, exp, f, f, math.Ldexp(float64(b), exp), r) } checkNonLossyRoundtrip(t, f) } } } } }
func NewExactFloat(v float64) ExactFloat { f := ExactFloat{bn: big.NewInt(0)} sb := math.Signbit(v) if sb { f.sign = -1 } else { f.sign = 1 } if math.IsNaN(v) { f.set_nan() } else if math.IsInf(v, int(f.sign)) { f.set_inf(f.sign) } else { frac, exp := math.Frexp(math.Abs(v)) m := uint64(math.Ldexp(frac, doubleMantissaBits)) f.bn = f.bn.SetUint64(m) f.bn_exp = exp - doubleMantissaBits f.Canonicalize() } return f }
func NewFreeze(d time.Duration, in Sound) *Freeze { f := Dtof(d, in.SampleRate()) n := f if n == 0 || n&(n-1) != 0 { _, e := math.Frexp(float64(n)) n = int(math.Ldexp(1, e)) } frz := &Freeze{mono: newmono(nil), prv: make(Discrete, n)} frz.sig = frz.prv[:f] inps := GetInputs(in) dp := new(Dispatcher) // t := time.Now() for i := 0; i < n; i += in.BufferLen() { dp.Dispatch(1, inps...) ringcopy(frz.sig[i:i+in.BufferLen()], in.Samples(), 0) } // log.Println("freeze took", time.Now().Sub(t)) return frz }
func ext۰math۰Ldexp(fr *frame, args []value) value { return math.Ldexp(args[0].(float64), args[1].(int)) }
// quotToFloat returns the non-negative IEEE 754 double-precision // value nearest to the quotient a/b, using round-to-even in halfway // cases. It does not mutate its arguments. // Preconditions: b is non-zero; a and b have no common factors. func quotToFloat(a, b nat) (f float64, exact bool) { // TODO(adonovan): specialize common degenerate cases: 1.0, integers. alen := a.bitLen() if alen == 0 { return 0, true } blen := b.bitLen() if blen == 0 { panic("division by zero") } // 1. Left-shift A or B such that quotient A/B is in [1<<53, 1<<55). // (54 bits if A<B when they are left-aligned, 55 bits if A>=B.) // This is 2 or 3 more than the float64 mantissa field width of 52: // - the optional extra bit is shifted away in step 3 below. // - the high-order 1 is omitted in float64 "normal" representation; // - the low-order 1 will be used during rounding then discarded. exp := alen - blen var a2, b2 nat a2 = a2.set(a) b2 = b2.set(b) if shift := 54 - exp; shift > 0 { a2 = a2.shl(a2, uint(shift)) } else if shift < 0 { b2 = b2.shl(b2, uint(-shift)) } // 2. Compute quotient and remainder (q, r). NB: due to the // extra shift, the low-order bit of q is logically the // high-order bit of r. var q nat q, r := q.div(a2, a2, b2) // (recycle a2) mantissa := low64(q) haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half // 3. If quotient didn't fit in 54 bits, re-do division by b2<<1 // (in effect---we accomplish this incrementally). if mantissa>>54 == 1 { if mantissa&1 == 1 { haveRem = true } mantissa >>= 1 exp++ } if mantissa>>53 != 1 { panic("expected exactly 54 bits of result") } // 4. Rounding. if -1022-52 <= exp && exp <= -1022 { // Denormal case; lose 'shift' bits of precision. shift := uint64(-1022 - (exp - 1)) // [1..53) lostbits := mantissa & (1<<shift - 1) haveRem = haveRem || lostbits != 0 mantissa >>= shift exp = -1023 + 2 } // Round q using round-half-to-even. exact = !haveRem if mantissa&1 != 0 { exact = false if haveRem || mantissa&2 != 0 { if mantissa++; mantissa >= 1<<54 { // Complete rollover 11...1 => 100...0, so shift is safe mantissa >>= 1 exp++ } } } mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 2^53. f = math.Ldexp(float64(mantissa), exp-53) if math.IsInf(f, 0) { exact = false } return }
// epsilonsqrt = 2^-26. func epsilonsqrt() float64 { return math.Ldexp(1.0, -26) }
} } return false } // Difficult boundary cases, derived from tables given in // Vern Paxson, A Program for Testing IEEE Decimal-Binary Conversion // ftp://ftp.ee.lbl.gov/testbase-report.ps.Z // var ftoaTests = []struct { N int F float64 A string }{ // Table 3: Stress Inputs for Converting 53-bit Binary to Decimal, < 1/2 ULP {0, math.Ldexp(8511030020275656, -342), "9.e-88"}, {1, math.Ldexp(5201988407066741, -824), "4.6e-233"}, {2, math.Ldexp(6406892948269899, +237), "1.41e+87"}, {3, math.Ldexp(8431154198732492, +72), "3.981e+37"}, {4, math.Ldexp(6475049196144587, +99), "4.1040e+45"}, {5, math.Ldexp(8274307542972842, +726), "2.92084e+234"}, {6, math.Ldexp(5381065484265332, -456), "2.891946e-122"}, {7, math.Ldexp(6761728585499734, -1057), "4.3787718e-303"}, {8, math.Ldexp(7976538478610756, +376), "1.22770163e+129"}, {9, math.Ldexp(5982403858958067, +377), "1.841552452e+129"}, {10, math.Ldexp(5536995190630837, +93), "5.4835744350e+43"}, {11, math.Ldexp(7225450889282194, +710), "3.89190181146e+229"}, {12, math.Ldexp(7225450889282194, +709), "1.945950905732e+229"}, {13, math.Ldexp(8703372741147379, +117), "1.4460958381605e+51"}, {14, math.Ldexp(8944262675275217, -1001), "4.17367747458531e-286"}, {15, math.Ldexp(7459803696087692, -707), "1.107950772878888e-197"},
// randomFloat64 returns a uniformly distributed value in the range [0,1). // Note that the values returned are all multiples of 2**-53, which means that // not all possible values in this range are returned. func randomFloat64() float64 { const randomFloatBits = 53 return math.Ldexp(float64(randomBits(randomFloatBits)), -randomFloatBits) }
// Return the length on the unit sphere for cells at the given level. func (m Metric) Value(level int) float64 { return math.Ldexp(m.Deriv, -m.dim*level) }
func mathLdexp(L *LState) int { L.Push(LNumber(math.Ldexp(float64(L.CheckNumber(1)), L.CheckInt(2)))) return 1 }
// Ldexp is the inverse of Frexp. // It returns frac × 2**exp. // // Special cases are: // Ldexp(±0, exp) = ±0 // Ldexp(±Inf, exp) = ±Inf // Ldexp(NaN, exp) = NaN func Ldexp(frac float32, exp int) float32 { return float32(math.Ldexp(float64(frac), exp)) }
func Ldexp(frac float64, exp int) float64 { return math.Ldexp(frac, exp) }
func (dec *Decoder) innerDecodeC(rv reflect.Value, c byte) error { cborType := c & typeMask cborInfo := c & infoBits aux, err := dec.handleInfoBits(cborInfo) if err != nil { log.Printf("error in handleInfoBits: %v", err) return err } //log.Printf("cborType %x cborInfo %d aux %x", cborType, cborInfo, aux) if cborType == cborUint { return setUint(rv, aux) } else if cborType == cborNegint { if aux > 0x7fffffffffffffff { //return errors.New(fmt.Sprintf("cannot represent -%d", aux)) bigU := &big.Int{} bigU.SetUint64(aux) minusOne := big.NewInt(-1) bn := &big.Int{} bn.Sub(minusOne, bigU) //log.Printf("built big negint: %v", bn) return setBignum(rv, bn) } return setInt(rv, -1-int64(aux)) } else if cborType == cborBytes { //log.Printf("cborType %x bytes cborInfo %d aux %x", cborType, cborInfo, aux) if cborInfo == varFollows { parts := make([][]byte, 0, 1) allsize := 0 subc := []byte{0} for true { _, err = io.ReadFull(dec.rin, subc) if err != nil { log.Printf("error reading next byte for bar bytes") return err } if subc[0] == 0xff { // done var out []byte = nil if len(parts) == 0 { out = make([]byte, 0) } else { pos := 0 out = make([]byte, allsize) for _, p := range parts { pos += copy(out[pos:], p) } } return setBytes(rv, out) } else { var subb []byte = nil if (subc[0] & typeMask) != cborBytes { return fmt.Errorf("sub of var bytes is type %x, wanted %x", subc[0], cborBytes) } err = dec.innerDecodeC(reflect.ValueOf(&subb), subc[0]) if err != nil { log.Printf("error decoding sub bytes") return err } allsize += len(subb) parts = append(parts, subb) } } } else { val := make([]byte, aux) _, err = io.ReadFull(dec.rin, val) if err != nil { return err } // Don't really care about count, ReadFull will make it all or none and we can just fall out with whatever error return setBytes(rv, val) /*if (rv.Kind() == reflect.Slice) && (rv.Type().Elem().Kind() == reflect.Uint8) { rv.SetBytes(val) } else { return fmt.Errorf("cannot write []byte to k=%s %s", rv.Kind().String(), rv.Type().String()) }*/ } } else if cborType == cborText { return dec.decodeText(rv, cborInfo, aux) } else if cborType == cborArray { return dec.decodeArray(rv, cborInfo, aux) } else if cborType == cborMap { return dec.decodeMap(rv, cborInfo, aux) } else if cborType == cborTag { /*var innerOb interface{}*/ ic := []byte{0} _, err = io.ReadFull(dec.rin, ic) if err != nil { return err } if aux == tagBignum { bn, err := dec.decodeBignum(ic[0]) if err != nil { return err } return setBignum(rv, bn) } else if aux == tagNegBignum { bn, err := dec.decodeBignum(ic[0]) if err != nil { return err } minusOne := big.NewInt(-1) bnOut := &big.Int{} bnOut.Sub(minusOne, bn) return setBignum(rv, bnOut) } else if aux == tagDecimal { log.Printf("TODO: directly read bytes into decimal") } else if aux == tagBigfloat { log.Printf("TODO: directly read bytes into bigfloat") } else { // log.Printf("TODO: handle cbor tag: %x", aux) return dec.innerDecodeC(rv, ic[0]) } return nil } else if cborType == cbor7 { if cborInfo == int16Follows { exp := (aux >> 10) & 0x01f mant := aux & 0x03ff var val float64 if exp == 0 { val = math.Ldexp(float64(mant), -24) } else if exp != 31 { val = math.Ldexp(float64(mant+1024), int(exp-25)) } else if mant == 0 { val = math.Inf(1) } else { val = math.NaN() } if (aux & 0x08000) != 0 { val = -val } return setFloat64(rv, val) } else if cborInfo == int32Follows { f := math.Float32frombits(uint32(aux)) return setFloat32(rv, f) } else if cborInfo == int64Follows { d := math.Float64frombits(aux) return setFloat64(rv, d) } else if cborInfo == cborFalse { reflect.Indirect(rv).Set(reflect.ValueOf(false)) } else if cborInfo == cborTrue { reflect.Indirect(rv).Set(reflect.ValueOf(true)) } else if cborInfo == cborNull { return setNil(rv) } } return err }
func mpgetfltN(a *Mpflt, prec int, bias int) float64 { if a.Val.Ovf != 0 && nsavederrors+nerrors == 0 { Yyerror("mpgetflt ovf") } s := sigfig(a) if s == 0 { return 0 } if s != Mpnorm { Yyerror("mpgetflt norm") mpnorm(a) } for a.Val.A[Mpnorm-1]&Mpsign == 0 { Mpshiftfix(&a.Val, 1) mpsetexp(a, int(a.Exp)-1) // can set 'a' to zero s = sigfig(a) if s == 0 { return 0 } } // pick up the mantissa, a rounding bit, and a tie-breaking bit in a uvlong s = prec + 2 v := uint64(0) var i int for i = Mpnorm - 1; s >= Mpscale; i-- { v = v<<Mpscale | uint64(a.Val.A[i]) s -= Mpscale } if s > 0 { v = v<<uint(s) | uint64(a.Val.A[i])>>uint(Mpscale-s) if a.Val.A[i]&((1<<uint(Mpscale-s))-1) != 0 { v |= 1 } i-- } for ; i >= 0; i-- { if a.Val.A[i] != 0 { v |= 1 } } // gradual underflow e := Mpnorm*Mpscale + int(a.Exp) - prec minexp := bias + 1 - prec + 1 if e < minexp { s := minexp - e if s > prec+1 { s = prec + 1 } if v&((1<<uint(s))-1) != 0 { v |= 1 << uint(s) } v >>= uint(s) e = minexp } // round to even v |= (v & 4) >> 2 v += v & 1 v >>= 2 f := float64(v) f = math.Ldexp(f, e) if a.Val.Neg != 0 { f = -f } return f }
// quotToFloat32 returns the non-negative float32 value // nearest to the quotient a/b, using round-to-even in // halfway cases. It does not mutate its arguments. // Preconditions: b is non-zero; a and b have no common factors. func quotToFloat32(a, b nat) (f float32, exact bool) { const ( // float size in bits Fsize = 32 // mantissa Msize = 23 Msize1 = Msize + 1 // incl. implicit 1 Msize2 = Msize1 + 1 // exponent Esize = Fsize - Msize1 Ebias = 1<<(Esize-1) - 1 Emin = 1 - Ebias Emax = Ebias ) // TODO(adonovan): specialize common degenerate cases: 1.0, integers. alen := a.bitLen() if alen == 0 { return 0, true } blen := b.bitLen() if blen == 0 { panic("division by zero") } // 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1) // (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B). // This is 2 or 3 more than the float32 mantissa field width of Msize: // - the optional extra bit is shifted away in step 3 below. // - the high-order 1 is omitted in "normal" representation; // - the low-order 1 will be used during rounding then discarded. exp := alen - blen var a2, b2 nat a2 = a2.set(a) b2 = b2.set(b) if shift := Msize2 - exp; shift > 0 { a2 = a2.shl(a2, uint(shift)) } else if shift < 0 { b2 = b2.shl(b2, uint(-shift)) } // 2. Compute quotient and remainder (q, r). NB: due to the // extra shift, the low-order bit of q is logically the // high-order bit of r. var q nat q, r := q.div(a2, a2, b2) // (recycle a2) mantissa := low32(q) haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 // (in effect---we accomplish this incrementally). if mantissa>>Msize2 == 1 { if mantissa&1 == 1 { haveRem = true } mantissa >>= 1 exp++ } if mantissa>>Msize1 != 1 { panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) } // 4. Rounding. if Emin-Msize <= exp && exp <= Emin { // Denormal case; lose 'shift' bits of precision. shift := uint(Emin - (exp - 1)) // [1..Esize1) lostbits := mantissa & (1<<shift - 1) haveRem = haveRem || lostbits != 0 mantissa >>= shift exp = 2 - Ebias // == exp + shift } // Round q using round-half-to-even. exact = !haveRem if mantissa&1 != 0 { exact = false if haveRem || mantissa&2 != 0 { if mantissa++; mantissa >= 1<<Msize2 { // Complete rollover 11...1 => 100...0, so shift is safe mantissa >>= 1 exp++ } } } mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<<Msize1. f = float32(math.Ldexp(float64(mantissa), exp-Msize1)) if math.IsInf(float64(f), 0) { exact = false } return }
{"ceil", mathUnaryOp(math.Ceil)}, {"cosh", mathUnaryOp(math.Cosh)}, {"cos", mathUnaryOp(math.Cos)}, {"deg", mathUnaryOp(func(x float64) float64 { return x / radiansPerDegree })}, {"exp", mathUnaryOp(math.Exp)}, {"floor", mathUnaryOp(math.Floor)}, {"fmod", mathBinaryOp(math.Mod)}, {"frexp", func(l *State) int { f, e := math.Frexp(CheckNumber(l, 1)) l.PushNumber(f) l.PushInteger(e) return 2 }}, {"ldexp", func(l *State) int { x, e := CheckNumber(l, 1), CheckInteger(l, 2) l.PushNumber(math.Ldexp(x, e)) return 1 }}, {"log", func(l *State) int { x := CheckNumber(l, 1) if l.IsNoneOrNil(2) { l.PushNumber(math.Log(x)) } else if base := CheckNumber(l, 2); base == 10.0 { l.PushNumber(math.Log10(x)) } else { l.PushNumber(math.Log(x) / math.Log(base)) } return 1 }}, {"max", reduce(math.Max)}, {"min", reduce(math.Min)},