Пример #1
0
func testIdamax(str string, n int, x []float64, incX int) {
	var natAns int
	nat := func() { natAns = native.Implementation{}.Idamax(n, x, incX) }
	errNative := blasfuzz.CatchPanic(nat)

	cx := blasfuzz.CloneF64S(x)
	var cAns int
	c := func() { cAns = cgo.Implementation{}.Idamax(n, cx, incX) }
	errC := blasfuzz.CatchPanic(c)

	blasfuzz.SamePanic(str, errC, errNative)
	blasfuzz.SameF64S(str, cx, x)
	// Known issue: If the slice contains NaN the answer may vary
	if !floats.HasNaN(x) {
		blasfuzz.SameInt(str, cAns, natAns)
	}
}
Пример #2
0
// CDF returns the empirical cumulative distribution function value of x, that is
// the fraction of the samples less than or equal to q. The
// exact behavior is determined by the CumulantKind. CDF is theoretically
// the inverse of the Quantile function, though it may not be the actual inverse
// for all values q and CumulantKinds.
//
// The x data must be sorted in increasing order. If weights is nil then all
// of the weights are 1. If weights is not nil, then len(x) must equal len(weights).
//
// CumulantKind behaviors:
//  - Empirical: Returns the lowest fraction for which q is greater than or equal
//  to that fraction of samples
func CDF(q float64, c CumulantKind, x, weights []float64) float64 {
	if weights != nil && len(x) != len(weights) {
		panic("stat: slice length mismatch")
	}
	if floats.HasNaN(x) {
		return math.NaN()
	}
	if !sort.Float64sAreSorted(x) {
		panic("x data are not sorted")
	}

	if q < x[0] {
		return 0
	}
	if q >= x[len(x)-1] {
		return 1
	}

	var sumWeights float64
	if weights == nil {
		sumWeights = float64(len(x))
	} else {
		sumWeights = floats.Sum(weights)
	}

	// Calculate the index
	switch c {
	case Empirical:
		// Find the smallest value that is greater than that percent of the samples
		var w float64
		for i, v := range x {
			if v > q {
				return w / sumWeights
			}
			if weights == nil {
				w++
			} else {
				w += weights[i]
			}
		}
		panic("impossible")
	default:
		panic("stat: bad cumulant kind")
	}
}
Пример #3
0
func IdamaxTest(t *testing.T, blasser Idamaxer) {
	idamax := blasser.Idamax
	for _, c := range DoubleOneVectorCases {
		if c.Panic {
			f := func() { idamax(c.N, c.X, c.Incx) }
			testpanics(f, c.Name, t)
			continue
		}
		v := idamax(c.N, c.X, c.Incx)
		if v != c.Idamax {
			s := fmt.Sprintf("idamax: mismatch %v: expected %v, found %v", c.Name, c.Idamax, v)
			if floats.HasNaN(c.X) {
				log.Println(s)
			} else {
				t.Errorf(s)
			}
		}
	}
}
Пример #4
0
// Quantile returns the sample of x such that x is greater than or
// equal to the fraction p of samples. The exact behavior is determined by the
// CumulantKind, and p should be a number between 0 and 1. Quantile is theoretically
// the inverse of the CDF function, though it may not be the actual inverse
// for all values p and CumulantKinds.
//
// The x data must be sorted in increasing order. If weights is nil then all
// of the weights are 1. If weights is not nil, then len(x) must equal len(weights).
//
// CumulantKind behaviors:
//  - Empirical: Returns the lowest value q for which q is greater than or equal
//  to the fraction p of samples
func Quantile(p float64, c CumulantKind, x, weights []float64) float64 {
	if !(p >= 0 && p <= 1) {
		panic("stat: percentile out of bounds")
	}

	if weights != nil && len(x) != len(weights) {
		panic("stat: slice length mismatch")
	}
	if floats.HasNaN(x) {
		return math.NaN() // This is needed because the algorithm breaks otherwise
	}
	if !sort.Float64sAreSorted(x) {
		panic("x data are not sorted")
	}

	var sumWeights float64
	if weights == nil {
		sumWeights = float64(len(x))
	} else {
		sumWeights = floats.Sum(weights)
	}
	switch c {
	case Empirical:
		var cumsum float64
		fidx := p * sumWeights
		for i := range x {
			if weights == nil {
				cumsum++
			} else {
				cumsum += weights[i]
			}
			if cumsum >= fidx {
				return x[i]
			}
		}
		panic("impossible")
	default:
		panic("stat: bad cumulant kind")
	}
}
Пример #5
0
// KolmogorovSmirnov computes the largest distance between two empirical CDFs.
// Each dataset x and y consists of sample locations and counts, xWeights and
// yWeights, respectively.
//
// x and y may have different lengths, though len(x) must equal len(xWeights), and
// len(y) must equal len(yWeights).  Both x and y must be sorted.
//
// Special cases are:
//  = 0 if len(x) == len(y) == 0
//  = 1 if len(x) == 0, len(y) != 0 or len(x) != 0 and len(y) == 0
func KolmogorovSmirnov(x, xWeights, y, yWeights []float64) float64 {
	if xWeights != nil && len(x) != len(xWeights) {
		panic("stat: slice length mismatch")
	}
	if yWeights != nil && len(y) != len(yWeights) {
		panic("stat: slice length mismatch")
	}
	if len(x) == 0 || len(y) == 0 {
		if len(x) == 0 && len(y) == 0 {
			return 0
		}
		return 1
	}

	if floats.HasNaN(x) {
		return math.NaN()
	}
	if floats.HasNaN(y) {
		return math.NaN()
	}

	if !sort.Float64sAreSorted(x) {
		panic("x data are not sorted")
	}
	if !sort.Float64sAreSorted(y) {
		panic("y data are not sorted")
	}

	xWeightsNil := xWeights == nil
	yWeightsNil := yWeights == nil

	var (
		maxDist    float64
		xSum, ySum float64
		xCdf, yCdf float64
		xIdx, yIdx int
	)

	if xWeightsNil {
		xSum = float64(len(x))
	} else {
		xSum = floats.Sum(xWeights)
	}

	if yWeightsNil {
		ySum = float64(len(y))
	} else {
		ySum = floats.Sum(yWeights)
	}

	xVal := x[0]
	yVal := y[0]

	// Algorithm description:
	// The goal is to find the maximum difference in the empirical CDFs for the
	// two datasets. The CDFs are piecewise-constant, and thus the distance
	// between the CDFs will only change at the values themselves.
	//
	// To find the maximum distance, step through the data in ascending order
	// of value between the two datasets. At each step, compute the empirical CDF
	// and compare the local distance with the maximum distance.
	// Due to some corner cases, equal data entries must be tallied simultaneously.
	for {
		switch {
		case xVal < yVal:
			xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
		case yVal < xVal:
			yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
		case xVal == yVal:
			newX := x[xIdx]
			newY := y[yIdx]
			if newX < newY {
				xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
			} else if newY < newX {
				yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
			} else {
				// Update them both, they'll be equal next time and the right
				// thing will happen
				xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil)
				yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil)
			}
		default:
			panic("unreachable")
		}

		dist := math.Abs(xCdf - yCdf)
		if dist > maxDist {
			maxDist = dist
		}

		// Both xCdf and yCdf will equal 1 at the end, so if we have reached the
		// end of either sample list, the distance is as large as it can be.
		if xIdx == len(x) || yIdx == len(y) {
			return maxDist
		}
	}
}