Ejemplo n.º 1
0
// Float64frombits returns the floating point number corresponding
// the IEEE 754 binary representation b.
//func Float64frombits(b uint64) float64 { return *(*float64)(unsafe.Pointer(&b)) }
func glrFloat64frombits(b uint64) float64 {
	var Zero = 0.0
	//var NegZero = -Zero
	var NaN = Zero / Zero
	s := float64(+1)
	if b&(1<<63) != 0 {
		s = -1
	}
	e := (b >> 52) & (1<<11 - 1)
	m := b & (1<<52 - 1)

	if e == (1<<11)-1 {
		if m == 0 {
			return s / 0
		}
		return NaN
	}
	if e != 0 {
		m += 1 << 52
	}
	if e == 0 {
		e = 1
	}

	return math.Ldexp(float64(m), int(e)-1023-52) * s
}
Ejemplo n.º 2
0
// Float32frombits returns the floating point number corresponding
// to the IEEE 754 binary representation b.
// func Float32frombits(b uint32) float32 { return *(*float32)(unsafe.Pointer(&b)) }
func glrFloat32frombits(b uint32) float32 {
	var Zero = 0.0
	//var NegZero = -Zero
	var NaN = Zero / Zero
	s := float32(+1)
	if b&(1<<31) != 0 {
		s = -1
	}
	e := (b >> 23) & (1<<8 - 1)
	m := b & (1<<23 - 1)

	if e == (1<<8)-1 {
		if m == 0 {
			return s / 0 // Inf
		}
		return float32(NaN)
	}
	if e != 0 {
		m += 1 << 23
	}
	if e == 0 {
		e = 1
	}

	return float32(math.Ldexp(float64(m), int(e)-127-23)) * s
}
Ejemplo n.º 3
0
Archivo: scan.go Proyecto: vsayer/go
// convertFloat converts the string to a float64value.
func (s *ss) convertFloat(str string, n int) float64 {
	if p := indexRune(str, 'p'); p >= 0 {
		// Atof doesn't handle power-of-2 exponents,
		// but they're easy to evaluate.
		f, err := strconv.ParseFloat(str[:p], n)
		if err != nil {
			// Put full string into error.
			if e, ok := err.(*strconv.NumError); ok {
				e.Num = str
			}
			s.error(err)
		}
		m, err := strconv.Atoi(str[p+1:])
		if err != nil {
			// Put full string into error.
			if e, ok := err.(*strconv.NumError); ok {
				e.Num = str
			}
			s.error(err)
		}
		return math.Ldexp(f, m)
	}
	f, err := strconv.ParseFloat(str, n)
	if err != nil {
		s.error(err)
	}
	return f
}
Ejemplo n.º 4
0
func float32Unpack(x uint32) float32 {
	mantissa := float64(x & 0x1fffff)
	if x&0x80000000 != 0 {
		mantissa = -mantissa
	}
	exponent := (x & 0x7fe00000) >> 21
	return float32(math.Ldexp(mantissa, int(exponent)-788))
}
Ejemplo n.º 5
0
func TestFloat32Distribution(t *testing.T) {
	//switch runtime.GOARCH {
	//case "cs", "java":
	//	return
	//}
	// Generate a distribution of (sign, mantissa, exp) values
	// broader than the float32 range, and check Rat.Float32()
	// always picks the closest float32 approximation.
	var add = []int64{
		0,
		1,
		3,
		5,
		7,
		9,
		11,
	}
	var winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64)
	if testing.Short() {
		winc, einc = 5, 15 // quick test (~60ms on x86-64)
	}

	for _, sign := range "+-" {
		for _, a := range add {
			for wid := uint64(0); wid < 30; wid += winc {
				b := 1<<wid + a
				if sign == '-' {
					b = -b
				}
				for exp := -150; exp < 150; exp += einc {
					num, den := NewInt(b), NewInt(1)
					if exp > 0 {
						num.Lsh(num, uint(exp))
					} else {
						den.Lsh(den, uint(-exp))
					}
					r := new(Rat).SetFrac(num, den)
					f, _ := r.Float32()

					if !checkIsBestApprox32(t, f, r) {
						// Append context information.
						t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)",
							b, exp, f, f, math.Ldexp(float64(b), exp), r)
					}

					checkNonLossyRoundtrip32(t, f)
				}
			}
		}
	}
}
Ejemplo n.º 6
0
func (f ExactFloat) ToDoubleHelper() float64 {
	sign := float64(f.sign)
	if !f.is_normal() {
		if f.is_zero() {
			return math.Copysign(0, sign)
		}
		if f.is_inf() {
			return math.Inf(f.sign)
		}
		return math.Copysign(math.NaN(), sign)
	}
	mantissa := f.bn.Uint64()
	return sign * math.Ldexp(float64(mantissa), f.bn_exp)
}
Ejemplo n.º 7
0
// ClipToPaddedFace returns the (u,v) coordinates for the portion of the edge AB that
// intersects the given face, but rather than clipping to the square [-1,1]x[-1,1]
// in (u,v) space, this method clips to [-R,R]x[-R,R] where R=(1+padding).
// Padding must be non-negative.
func ClipToPaddedFace(a, b Point, f int, padding float64) (aUV, bUV r2.Point, intersects bool) {
	// Fast path: both endpoints are on the given face.
	if face(a.Vector) == f && face(b.Vector) == f {
		au, av := validFaceXYZToUV(f, a.Vector)
		bu, bv := validFaceXYZToUV(f, b.Vector)
		return r2.Point{au, av}, r2.Point{bu, bv}, true
	}

	// Convert everything into the (u,v,w) coordinates of the given face. Note
	// that the cross product *must* be computed in the original (x,y,z)
	// coordinate system because PointCross (unlike the mathematical cross
	// product) can produce different results in different coordinate systems
	// when one argument is a linear multiple of the other, due to the use of
	// symbolic perturbations.
	normUVW := pointUVW(faceXYZtoUVW(f, a.PointCross(b)))
	aUVW := pointUVW(faceXYZtoUVW(f, a))
	bUVW := pointUVW(faceXYZtoUVW(f, b))

	// Padding is handled by scaling the u- and v-components of the normal.
	// Letting R=1+padding, this means that when we compute the dot product of
	// the normal with a cube face vertex (such as (-1,-1,1)), we will actually
	// compute the dot product with the scaled vertex (-R,-R,1). This allows
	// methods such as intersectsFace, exitAxis, etc, to handle padding
	// with no further modifications.
	scaleUV := 1 + padding
	scaledN := pointUVW{r3.Vector{X: scaleUV * normUVW.X, Y: scaleUV * normUVW.Y, Z: normUVW.Z}}
	if !scaledN.intersectsFace() {
		return aUV, bUV, false
	}

	// TODO(roberts): This is a workaround for extremely small vectors where some
	// loss of precision can occur in Normalize causing underflow. When PointCross
	// is updated to work around this, this can be removed.
	if math.Max(math.Abs(normUVW.X), math.Max(math.Abs(normUVW.Y), math.Abs(normUVW.Z))) < math.Ldexp(1, -511) {
		normUVW = pointUVW{normUVW.Mul(math.Ldexp(1, 563))}
	}

	normUVW = pointUVW{normUVW.Normalize()}

	aTan := pointUVW{normUVW.Cross(aUVW.Vector)}
	bTan := pointUVW{bUVW.Cross(normUVW.Vector)}

	// As described in clipDestination, if the sum of the scores from clipping the two
	// endpoints is 3 or more, then the segment does not intersect this face.
	aUV, aScore := clipDestination(bUVW, aUVW, pointUVW{scaledN.Mul(-1)}, bTan, aTan, scaleUV)
	bUV, bScore := clipDestination(aUVW, bUVW, scaledN, aTan, bTan, scaleUV)

	return aUV, bUV, aScore+bScore < 3
}
Ejemplo n.º 8
0
func TestFloat64Distribution(t *testing.T) {
	// Generate a distribution of (sign, mantissa, exp) values
	// broader than the float64 range, and check Rat.Float64()
	// always picks the closest float64 approximation.
	var add = []int64{
		0,
		1,
		3,
		5,
		7,
		9,
		11,
	}
	var winc, einc = uint64(1), int(1) // soak test (~75s on x86-64)
	if testing.Short() {
		winc, einc = 10, 500 // quick test (~12ms on x86-64)
	}

	for _, sign := range "+-" {
		for _, a := range add {
			for wid := uint64(0); wid < 60; wid += winc {
				b := int64(1<<wid + a)
				if sign == '-' {
					b = -b
				}
				for exp := -1100; exp < 1100; exp += einc {
					num, den := NewInt(b), NewInt(1)
					if exp > 0 {
						num.Lsh(num, uint(exp))
					} else {
						den.Lsh(den, uint(-exp))
					}
					r := new(Rat).SetFrac(num, den)
					f, _ := r.Float64()

					if !checkIsBestApprox(t, f, r) {
						// Append context information.
						t.Errorf("(input was mantissa %#x, exp %d; f=%g (%b); f~%g; r=%v)",
							b, exp, f, f, math.Ldexp(float64(b), exp), r)
					}

					checkNonLossyRoundtrip(t, f)
				}
			}
		}
	}
}
Ejemplo n.º 9
0
func NewExactFloat(v float64) ExactFloat {
	f := ExactFloat{bn: big.NewInt(0)}
	sb := math.Signbit(v)
	if sb {
		f.sign = -1
	} else {
		f.sign = 1
	}
	if math.IsNaN(v) {
		f.set_nan()
	} else if math.IsInf(v, int(f.sign)) {
		f.set_inf(f.sign)
	} else {
		frac, exp := math.Frexp(math.Abs(v))
		m := uint64(math.Ldexp(frac, doubleMantissaBits))
		f.bn = f.bn.SetUint64(m)
		f.bn_exp = exp - doubleMantissaBits
		f.Canonicalize()
	}
	return f
}
Ejemplo n.º 10
0
func NewFreeze(d time.Duration, in Sound) *Freeze {
	f := Dtof(d, in.SampleRate())

	n := f
	if n == 0 || n&(n-1) != 0 {
		_, e := math.Frexp(float64(n))
		n = int(math.Ldexp(1, e))
	}

	frz := &Freeze{mono: newmono(nil), prv: make(Discrete, n)}
	frz.sig = frz.prv[:f]

	inps := GetInputs(in)
	dp := new(Dispatcher)

	// t := time.Now()
	for i := 0; i < n; i += in.BufferLen() {
		dp.Dispatch(1, inps...)
		ringcopy(frz.sig[i:i+in.BufferLen()], in.Samples(), 0)
	}
	// log.Println("freeze took", time.Now().Sub(t))
	return frz
}
Ejemplo n.º 11
0
func ext۰math۰Ldexp(fr *frame, args []value) value {
	return math.Ldexp(args[0].(float64), args[1].(int))
}
Ejemplo n.º 12
0
Archivo: rat.go Proyecto: jmesmon/gcc
// quotToFloat returns the non-negative IEEE 754 double-precision
// value nearest to the quotient a/b, using round-to-even in halfway
// cases.  It does not mutate its arguments.
// Preconditions: b is non-zero; a and b have no common factors.
func quotToFloat(a, b nat) (f float64, exact bool) {
	// TODO(adonovan): specialize common degenerate cases: 1.0, integers.
	alen := a.bitLen()
	if alen == 0 {
		return 0, true
	}
	blen := b.bitLen()
	if blen == 0 {
		panic("division by zero")
	}

	// 1. Left-shift A or B such that quotient A/B is in [1<<53, 1<<55).
	// (54 bits if A<B when they are left-aligned, 55 bits if A>=B.)
	// This is 2 or 3 more than the float64 mantissa field width of 52:
	// - the optional extra bit is shifted away in step 3 below.
	// - the high-order 1 is omitted in float64 "normal" representation;
	// - the low-order 1 will be used during rounding then discarded.
	exp := alen - blen
	var a2, b2 nat
	a2 = a2.set(a)
	b2 = b2.set(b)
	if shift := 54 - exp; shift > 0 {
		a2 = a2.shl(a2, uint(shift))
	} else if shift < 0 {
		b2 = b2.shl(b2, uint(-shift))
	}

	// 2. Compute quotient and remainder (q, r).  NB: due to the
	// extra shift, the low-order bit of q is logically the
	// high-order bit of r.
	var q nat
	q, r := q.div(a2, a2, b2) // (recycle a2)
	mantissa := low64(q)
	haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half

	// 3. If quotient didn't fit in 54 bits, re-do division by b2<<1
	// (in effect---we accomplish this incrementally).
	if mantissa>>54 == 1 {
		if mantissa&1 == 1 {
			haveRem = true
		}
		mantissa >>= 1
		exp++
	}
	if mantissa>>53 != 1 {
		panic("expected exactly 54 bits of result")
	}

	// 4. Rounding.
	if -1022-52 <= exp && exp <= -1022 {
		// Denormal case; lose 'shift' bits of precision.
		shift := uint64(-1022 - (exp - 1)) // [1..53)
		lostbits := mantissa & (1<<shift - 1)
		haveRem = haveRem || lostbits != 0
		mantissa >>= shift
		exp = -1023 + 2
	}
	// Round q using round-half-to-even.
	exact = !haveRem
	if mantissa&1 != 0 {
		exact = false
		if haveRem || mantissa&2 != 0 {
			if mantissa++; mantissa >= 1<<54 {
				// Complete rollover 11...1 => 100...0, so shift is safe
				mantissa >>= 1
				exp++
			}
		}
	}
	mantissa >>= 1 // discard rounding bit.  Mantissa now scaled by 2^53.

	f = math.Ldexp(float64(mantissa), exp-53)
	if math.IsInf(f, 0) {
		exact = false
	}
	return
}
Ejemplo n.º 13
0
// epsilonsqrt = 2^-26.
func epsilonsqrt() float64 {
	return math.Ldexp(1.0, -26)
}
Ejemplo n.º 14
0
		}
	}
	return false
}

// Difficult boundary cases, derived from tables given in
//	Vern Paxson, A Program for Testing IEEE Decimal-Binary Conversion
//	ftp://ftp.ee.lbl.gov/testbase-report.ps.Z
//
var ftoaTests = []struct {
	N int
	F float64
	A string
}{
	// Table 3: Stress Inputs for Converting 53-bit Binary to Decimal, < 1/2 ULP
	{0, math.Ldexp(8511030020275656, -342), "9.e-88"},
	{1, math.Ldexp(5201988407066741, -824), "4.6e-233"},
	{2, math.Ldexp(6406892948269899, +237), "1.41e+87"},
	{3, math.Ldexp(8431154198732492, +72), "3.981e+37"},
	{4, math.Ldexp(6475049196144587, +99), "4.1040e+45"},
	{5, math.Ldexp(8274307542972842, +726), "2.92084e+234"},
	{6, math.Ldexp(5381065484265332, -456), "2.891946e-122"},
	{7, math.Ldexp(6761728585499734, -1057), "4.3787718e-303"},
	{8, math.Ldexp(7976538478610756, +376), "1.22770163e+129"},
	{9, math.Ldexp(5982403858958067, +377), "1.841552452e+129"},
	{10, math.Ldexp(5536995190630837, +93), "5.4835744350e+43"},
	{11, math.Ldexp(7225450889282194, +710), "3.89190181146e+229"},
	{12, math.Ldexp(7225450889282194, +709), "1.945950905732e+229"},
	{13, math.Ldexp(8703372741147379, +117), "1.4460958381605e+51"},
	{14, math.Ldexp(8944262675275217, -1001), "4.17367747458531e-286"},
	{15, math.Ldexp(7459803696087692, -707), "1.107950772878888e-197"},
Ejemplo n.º 15
0
// randomFloat64 returns a uniformly distributed value in the range [0,1).
// Note that the values returned are all multiples of 2**-53, which means that
// not all possible values in this range are returned.
func randomFloat64() float64 {
	const randomFloatBits = 53
	return math.Ldexp(float64(randomBits(randomFloatBits)), -randomFloatBits)
}
Ejemplo n.º 16
0
// Return the length on the unit sphere for cells at the given level.
func (m Metric) Value(level int) float64 {
	return math.Ldexp(m.Deriv, -m.dim*level)
}
Ejemplo n.º 17
0
func mathLdexp(L *LState) int {
	L.Push(LNumber(math.Ldexp(float64(L.CheckNumber(1)), L.CheckInt(2))))
	return 1
}
Ejemplo n.º 18
0
// Ldexp is the inverse of Frexp.
// It returns frac × 2**exp.
//
// Special cases are:
//	Ldexp(±0, exp) = ±0
//	Ldexp(±Inf, exp) = ±Inf
//	Ldexp(NaN, exp) = NaN
func Ldexp(frac float32, exp int) float32 {
	return float32(math.Ldexp(float64(frac), exp))
}
Ejemplo n.º 19
0
func Ldexp(frac float64, exp int) float64 {
	return math.Ldexp(frac, exp)
}
Ejemplo n.º 20
0
func (dec *Decoder) innerDecodeC(rv reflect.Value, c byte) error {
	cborType := c & typeMask
	cborInfo := c & infoBits

	aux, err := dec.handleInfoBits(cborInfo)
	if err != nil {
		log.Printf("error in handleInfoBits: %v", err)
		return err
	}
	//log.Printf("cborType %x cborInfo %d aux %x", cborType, cborInfo, aux)

	if cborType == cborUint {
		return setUint(rv, aux)
	} else if cborType == cborNegint {
		if aux > 0x7fffffffffffffff {
			//return errors.New(fmt.Sprintf("cannot represent -%d", aux))
			bigU := &big.Int{}
			bigU.SetUint64(aux)
			minusOne := big.NewInt(-1)
			bn := &big.Int{}
			bn.Sub(minusOne, bigU)
			//log.Printf("built big negint: %v", bn)
			return setBignum(rv, bn)
		}
		return setInt(rv, -1-int64(aux))
	} else if cborType == cborBytes {
		//log.Printf("cborType %x bytes cborInfo %d aux %x", cborType, cborInfo, aux)
		if cborInfo == varFollows {
			parts := make([][]byte, 0, 1)
			allsize := 0
			subc := []byte{0}
			for true {
				_, err = io.ReadFull(dec.rin, subc)
				if err != nil {
					log.Printf("error reading next byte for bar bytes")
					return err
				}
				if subc[0] == 0xff {
					// done
					var out []byte = nil
					if len(parts) == 0 {
						out = make([]byte, 0)
					} else {
						pos := 0
						out = make([]byte, allsize)
						for _, p := range parts {
							pos += copy(out[pos:], p)
						}
					}
					return setBytes(rv, out)
				} else {
					var subb []byte = nil
					if (subc[0] & typeMask) != cborBytes {
						return fmt.Errorf("sub of var bytes is type %x, wanted %x", subc[0], cborBytes)
					}
					err = dec.innerDecodeC(reflect.ValueOf(&subb), subc[0])
					if err != nil {
						log.Printf("error decoding sub bytes")
						return err
					}
					allsize += len(subb)
					parts = append(parts, subb)
				}
			}
		} else {
			val := make([]byte, aux)
			_, err = io.ReadFull(dec.rin, val)
			if err != nil {
				return err
			}
			// Don't really care about count, ReadFull will make it all or none and we can just fall out with whatever error
			return setBytes(rv, val)
			/*if (rv.Kind() == reflect.Slice) && (rv.Type().Elem().Kind() == reflect.Uint8) {
				rv.SetBytes(val)
			} else {
				return fmt.Errorf("cannot write []byte to k=%s %s", rv.Kind().String(), rv.Type().String())
			}*/
		}
	} else if cborType == cborText {
		return dec.decodeText(rv, cborInfo, aux)
	} else if cborType == cborArray {
		return dec.decodeArray(rv, cborInfo, aux)
	} else if cborType == cborMap {
		return dec.decodeMap(rv, cborInfo, aux)
	} else if cborType == cborTag {
		/*var innerOb interface{}*/
		ic := []byte{0}
		_, err = io.ReadFull(dec.rin, ic)
		if err != nil {
			return err
		}
		if aux == tagBignum {
			bn, err := dec.decodeBignum(ic[0])
			if err != nil {
				return err
			}
			return setBignum(rv, bn)
		} else if aux == tagNegBignum {
			bn, err := dec.decodeBignum(ic[0])
			if err != nil {
				return err
			}
			minusOne := big.NewInt(-1)
			bnOut := &big.Int{}
			bnOut.Sub(minusOne, bn)
			return setBignum(rv, bnOut)
		} else if aux == tagDecimal {
			log.Printf("TODO: directly read bytes into decimal")
		} else if aux == tagBigfloat {
			log.Printf("TODO: directly read bytes into bigfloat")
		} else {
			// log.Printf("TODO: handle cbor tag: %x", aux)
			return dec.innerDecodeC(rv, ic[0])
		}
		return nil
	} else if cborType == cbor7 {
		if cborInfo == int16Follows {
			exp := (aux >> 10) & 0x01f
			mant := aux & 0x03ff
			var val float64
			if exp == 0 {
				val = math.Ldexp(float64(mant), -24)
			} else if exp != 31 {
				val = math.Ldexp(float64(mant+1024), int(exp-25))
			} else if mant == 0 {
				val = math.Inf(1)
			} else {
				val = math.NaN()
			}
			if (aux & 0x08000) != 0 {
				val = -val
			}
			return setFloat64(rv, val)
		} else if cborInfo == int32Follows {
			f := math.Float32frombits(uint32(aux))
			return setFloat32(rv, f)
		} else if cborInfo == int64Follows {
			d := math.Float64frombits(aux)
			return setFloat64(rv, d)
		} else if cborInfo == cborFalse {
			reflect.Indirect(rv).Set(reflect.ValueOf(false))
		} else if cborInfo == cborTrue {
			reflect.Indirect(rv).Set(reflect.ValueOf(true))
		} else if cborInfo == cborNull {
			return setNil(rv)
		}
	}

	return err
}
Ejemplo n.º 21
0
func mpgetfltN(a *Mpflt, prec int, bias int) float64 {
	if a.Val.Ovf != 0 && nsavederrors+nerrors == 0 {
		Yyerror("mpgetflt ovf")
	}

	s := sigfig(a)
	if s == 0 {
		return 0
	}

	if s != Mpnorm {
		Yyerror("mpgetflt norm")
		mpnorm(a)
	}

	for a.Val.A[Mpnorm-1]&Mpsign == 0 {
		Mpshiftfix(&a.Val, 1)
		mpsetexp(a, int(a.Exp)-1) // can set 'a' to zero
		s = sigfig(a)
		if s == 0 {
			return 0
		}
	}

	// pick up the mantissa, a rounding bit, and a tie-breaking bit in a uvlong
	s = prec + 2

	v := uint64(0)
	var i int
	for i = Mpnorm - 1; s >= Mpscale; i-- {
		v = v<<Mpscale | uint64(a.Val.A[i])
		s -= Mpscale
	}

	if s > 0 {
		v = v<<uint(s) | uint64(a.Val.A[i])>>uint(Mpscale-s)
		if a.Val.A[i]&((1<<uint(Mpscale-s))-1) != 0 {
			v |= 1
		}
		i--
	}

	for ; i >= 0; i-- {
		if a.Val.A[i] != 0 {
			v |= 1
		}
	}

	// gradual underflow
	e := Mpnorm*Mpscale + int(a.Exp) - prec

	minexp := bias + 1 - prec + 1
	if e < minexp {
		s := minexp - e
		if s > prec+1 {
			s = prec + 1
		}
		if v&((1<<uint(s))-1) != 0 {
			v |= 1 << uint(s)
		}
		v >>= uint(s)
		e = minexp
	}

	// round to even
	v |= (v & 4) >> 2

	v += v & 1
	v >>= 2

	f := float64(v)
	f = math.Ldexp(f, e)

	if a.Val.Neg != 0 {
		f = -f
	}

	return f
}
Ejemplo n.º 22
0
// quotToFloat32 returns the non-negative float32 value
// nearest to the quotient a/b, using round-to-even in
// halfway cases.  It does not mutate its arguments.
// Preconditions: b is non-zero; a and b have no common factors.
func quotToFloat32(a, b nat) (f float32, exact bool) {
	const (
		// float size in bits
		Fsize = 32

		// mantissa
		Msize  = 23
		Msize1 = Msize + 1 // incl. implicit 1
		Msize2 = Msize1 + 1

		// exponent
		Esize = Fsize - Msize1
		Ebias = 1<<(Esize-1) - 1
		Emin  = 1 - Ebias
		Emax  = Ebias
	)

	// TODO(adonovan): specialize common degenerate cases: 1.0, integers.
	alen := a.bitLen()
	if alen == 0 {
		return 0, true
	}
	blen := b.bitLen()
	if blen == 0 {
		panic("division by zero")
	}

	// 1. Left-shift A or B such that quotient A/B is in [1<<Msize1, 1<<(Msize2+1)
	// (Msize2 bits if A < B when they are left-aligned, Msize2+1 bits if A >= B).
	// This is 2 or 3 more than the float32 mantissa field width of Msize:
	// - the optional extra bit is shifted away in step 3 below.
	// - the high-order 1 is omitted in "normal" representation;
	// - the low-order 1 will be used during rounding then discarded.
	exp := alen - blen
	var a2, b2 nat
	a2 = a2.set(a)
	b2 = b2.set(b)
	if shift := Msize2 - exp; shift > 0 {
		a2 = a2.shl(a2, uint(shift))
	} else if shift < 0 {
		b2 = b2.shl(b2, uint(-shift))
	}

	// 2. Compute quotient and remainder (q, r).  NB: due to the
	// extra shift, the low-order bit of q is logically the
	// high-order bit of r.
	var q nat
	q, r := q.div(a2, a2, b2) // (recycle a2)
	mantissa := low32(q)
	haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half

	// 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1
	// (in effect---we accomplish this incrementally).
	if mantissa>>Msize2 == 1 {
		if mantissa&1 == 1 {
			haveRem = true
		}
		mantissa >>= 1
		exp++
	}
	if mantissa>>Msize1 != 1 {
		panic(fmt.Sprintf("expected exactly %d bits of result", Msize2))
	}

	// 4. Rounding.
	if Emin-Msize <= exp && exp <= Emin {
		// Denormal case; lose 'shift' bits of precision.
		shift := uint(Emin - (exp - 1)) // [1..Esize1)
		lostbits := mantissa & (1<<shift - 1)
		haveRem = haveRem || lostbits != 0
		mantissa >>= shift
		exp = 2 - Ebias // == exp + shift
	}
	// Round q using round-half-to-even.
	exact = !haveRem
	if mantissa&1 != 0 {
		exact = false
		if haveRem || mantissa&2 != 0 {
			if mantissa++; mantissa >= 1<<Msize2 {
				// Complete rollover 11...1 => 100...0, so shift is safe
				mantissa >>= 1
				exp++
			}
		}
	}
	mantissa >>= 1 // discard rounding bit.  Mantissa now scaled by 1<<Msize1.

	f = float32(math.Ldexp(float64(mantissa), exp-Msize1))
	if math.IsInf(float64(f), 0) {
		exact = false
	}
	return
}
Ejemplo n.º 23
0
	{"ceil", mathUnaryOp(math.Ceil)},
	{"cosh", mathUnaryOp(math.Cosh)},
	{"cos", mathUnaryOp(math.Cos)},
	{"deg", mathUnaryOp(func(x float64) float64 { return x / radiansPerDegree })},
	{"exp", mathUnaryOp(math.Exp)},
	{"floor", mathUnaryOp(math.Floor)},
	{"fmod", mathBinaryOp(math.Mod)},
	{"frexp", func(l *State) int {
		f, e := math.Frexp(CheckNumber(l, 1))
		l.PushNumber(f)
		l.PushInteger(e)
		return 2
	}},
	{"ldexp", func(l *State) int {
		x, e := CheckNumber(l, 1), CheckInteger(l, 2)
		l.PushNumber(math.Ldexp(x, e))
		return 1
	}},
	{"log", func(l *State) int {
		x := CheckNumber(l, 1)
		if l.IsNoneOrNil(2) {
			l.PushNumber(math.Log(x))
		} else if base := CheckNumber(l, 2); base == 10.0 {
			l.PushNumber(math.Log10(x))
		} else {
			l.PushNumber(math.Log(x) / math.Log(base))
		}
		return 1
	}},
	{"max", reduce(math.Max)},
	{"min", reduce(math.Min)},