func testIdamax(str string, n int, x []float64, incX int) { var natAns int nat := func() { natAns = native.Implementation{}.Idamax(n, x, incX) } errNative := blasfuzz.CatchPanic(nat) cx := blasfuzz.CloneF64S(x) var cAns int c := func() { cAns = cgo.Implementation{}.Idamax(n, cx, incX) } errC := blasfuzz.CatchPanic(c) blasfuzz.SamePanic(str, errC, errNative) blasfuzz.SameF64S(str, cx, x) // Known issue: If the slice contains NaN the answer may vary if !floats.HasNaN(x) { blasfuzz.SameInt(str, cAns, natAns) } }
// CDF returns the empirical cumulative distribution function value of x, that is // the fraction of the samples less than or equal to q. The // exact behavior is determined by the CumulantKind. CDF is theoretically // the inverse of the Quantile function, though it may not be the actual inverse // for all values q and CumulantKinds. // // The x data must be sorted in increasing order. If weights is nil then all // of the weights are 1. If weights is not nil, then len(x) must equal len(weights). // // CumulantKind behaviors: // - Empirical: Returns the lowest fraction for which q is greater than or equal // to that fraction of samples func CDF(q float64, c CumulantKind, x, weights []float64) float64 { if weights != nil && len(x) != len(weights) { panic("stat: slice length mismatch") } if floats.HasNaN(x) { return math.NaN() } if !sort.Float64sAreSorted(x) { panic("x data are not sorted") } if q < x[0] { return 0 } if q >= x[len(x)-1] { return 1 } var sumWeights float64 if weights == nil { sumWeights = float64(len(x)) } else { sumWeights = floats.Sum(weights) } // Calculate the index switch c { case Empirical: // Find the smallest value that is greater than that percent of the samples var w float64 for i, v := range x { if v > q { return w / sumWeights } if weights == nil { w++ } else { w += weights[i] } } panic("impossible") default: panic("stat: bad cumulant kind") } }
func IdamaxTest(t *testing.T, blasser Idamaxer) { idamax := blasser.Idamax for _, c := range DoubleOneVectorCases { if c.Panic { f := func() { idamax(c.N, c.X, c.Incx) } testpanics(f, c.Name, t) continue } v := idamax(c.N, c.X, c.Incx) if v != c.Idamax { s := fmt.Sprintf("idamax: mismatch %v: expected %v, found %v", c.Name, c.Idamax, v) if floats.HasNaN(c.X) { log.Println(s) } else { t.Errorf(s) } } } }
// Quantile returns the sample of x such that x is greater than or // equal to the fraction p of samples. The exact behavior is determined by the // CumulantKind, and p should be a number between 0 and 1. Quantile is theoretically // the inverse of the CDF function, though it may not be the actual inverse // for all values p and CumulantKinds. // // The x data must be sorted in increasing order. If weights is nil then all // of the weights are 1. If weights is not nil, then len(x) must equal len(weights). // // CumulantKind behaviors: // - Empirical: Returns the lowest value q for which q is greater than or equal // to the fraction p of samples func Quantile(p float64, c CumulantKind, x, weights []float64) float64 { if !(p >= 0 && p <= 1) { panic("stat: percentile out of bounds") } if weights != nil && len(x) != len(weights) { panic("stat: slice length mismatch") } if floats.HasNaN(x) { return math.NaN() // This is needed because the algorithm breaks otherwise } if !sort.Float64sAreSorted(x) { panic("x data are not sorted") } var sumWeights float64 if weights == nil { sumWeights = float64(len(x)) } else { sumWeights = floats.Sum(weights) } switch c { case Empirical: var cumsum float64 fidx := p * sumWeights for i := range x { if weights == nil { cumsum++ } else { cumsum += weights[i] } if cumsum >= fidx { return x[i] } } panic("impossible") default: panic("stat: bad cumulant kind") } }
// KolmogorovSmirnov computes the largest distance between two empirical CDFs. // Each dataset x and y consists of sample locations and counts, xWeights and // yWeights, respectively. // // x and y may have different lengths, though len(x) must equal len(xWeights), and // len(y) must equal len(yWeights). Both x and y must be sorted. // // Special cases are: // = 0 if len(x) == len(y) == 0 // = 1 if len(x) == 0, len(y) != 0 or len(x) != 0 and len(y) == 0 func KolmogorovSmirnov(x, xWeights, y, yWeights []float64) float64 { if xWeights != nil && len(x) != len(xWeights) { panic("stat: slice length mismatch") } if yWeights != nil && len(y) != len(yWeights) { panic("stat: slice length mismatch") } if len(x) == 0 || len(y) == 0 { if len(x) == 0 && len(y) == 0 { return 0 } return 1 } if floats.HasNaN(x) { return math.NaN() } if floats.HasNaN(y) { return math.NaN() } if !sort.Float64sAreSorted(x) { panic("x data are not sorted") } if !sort.Float64sAreSorted(y) { panic("y data are not sorted") } xWeightsNil := xWeights == nil yWeightsNil := yWeights == nil var ( maxDist float64 xSum, ySum float64 xCdf, yCdf float64 xIdx, yIdx int ) if xWeightsNil { xSum = float64(len(x)) } else { xSum = floats.Sum(xWeights) } if yWeightsNil { ySum = float64(len(y)) } else { ySum = floats.Sum(yWeights) } xVal := x[0] yVal := y[0] // Algorithm description: // The goal is to find the maximum difference in the empirical CDFs for the // two datasets. The CDFs are piecewise-constant, and thus the distance // between the CDFs will only change at the values themselves. // // To find the maximum distance, step through the data in ascending order // of value between the two datasets. At each step, compute the empirical CDF // and compare the local distance with the maximum distance. // Due to some corner cases, equal data entries must be tallied simultaneously. for { switch { case xVal < yVal: xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) case yVal < xVal: yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) case xVal == yVal: newX := x[xIdx] newY := y[yIdx] if newX < newY { xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) } else if newY < newX { yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) } else { // Update them both, they'll be equal next time and the right // thing will happen xVal, xCdf, xIdx = updateKS(xIdx, xCdf, xSum, x, xWeights, xWeightsNil) yVal, yCdf, yIdx = updateKS(yIdx, yCdf, ySum, y, yWeights, yWeightsNil) } default: panic("unreachable") } dist := math.Abs(xCdf - yCdf) if dist > maxDist { maxDist = dist } // Both xCdf and yCdf will equal 1 at the end, so if we have reached the // end of either sample list, the distance is as large as it can be. if xIdx == len(x) || yIdx == len(y) { return maxDist } } }