// bisect returns an x in [low, high] such that |f(x)| <= tolerance // using the bisection method. // // f(low) and f(high) must have opposite signs. // // If f does not have a root in this interval (e.g., it is // discontiguous), this returns the X of the apparent discontinuity // and false. func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) { flow, fhigh := f(low), f(high) if -tolerance <= flow && flow <= tolerance { return low, true } if -tolerance <= fhigh && fhigh <= tolerance { return high, true } if mathx.Sign(flow) == mathx.Sign(fhigh) { panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh)) } for { mid := (high + low) / 2 fmid := f(mid) if -tolerance <= fmid && fmid <= tolerance { return mid, true } if mid == high || mid == low { return mid, false } if mathx.Sign(fmid) == mathx.Sign(flow) { low = mid flow = fmid } else { high = mid fhigh = fmid } } }
// MannWhitneyUTest performs a Mann-Whitney U-test [1,2] of the null // hypothesis that two samples come from the same population against // the alternative hypothesis that one sample tends to have larger or // smaller values than the other. // // This is similar to a t-test, but unlike the t-test, the // Mann-Whitney U-test is non-parametric (it does not assume a normal // distribution). It has very slightly lower efficiency than the // t-test on normal distributions. // // Computing the exact U distribution is expensive for large sample // sizes, so this uses a normal approximation for sample sizes larger // than MannWhitneyExactLimit if there are no ties or // MannWhitneyTiesExactLimit if there are ties. This normal // approximation uses both the tie correction and the continuity // correction. // // This can fail with ErrSampleSize if either sample is empty or // ErrSamplesEqual if all sample values are equal. // // This is also known as a Mann-Whitney-Wilcoxon test and is // equivalent to the Wilcoxon rank-sum test, though the Wilcoxon // rank-sum test differs in nomenclature. // // [1] Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of // Whether one of Two Random Variables is Stochastically Larger than // the Other". Annals of Mathematical Statistics 18 (1): 50–60. // // [2] Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer". // Journal of the American Statistical Association 61 (315): 772-787. func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) { n1, n2 := len(x1), len(x2) if n1 == 0 || n2 == 0 { return nil, ErrSampleSize } // Compute the U statistic and tie vector T. x1 = append([]float64(nil), x1...) x2 = append([]float64(nil), x2...) sort.Float64s(x1) sort.Float64s(x2) merged, labels := labeledMerge(x1, x2) R1 := 0.0 T, hasTies := []int{}, false for i := 0; i < len(merged); { rank1, nx1, v1 := i+1, 0, merged[i] // Consume samples that tie this sample (including itself). for ; i < len(merged) && merged[i] == v1; i++ { if labels[i] == 1 { nx1++ } } // Assign all tied samples the average rank of the // samples, where merged[0] has rank 1. if nx1 != 0 { rank := float64(i+rank1) / 2 R1 += rank * float64(nx1) } T = append(T, i-rank1+1) if i > rank1 { hasTies = true } } U1 := R1 - float64(n1*(n1+1))/2 // Compute the smaller of U1 and U2 U2 := float64(n1*n2) - U1 Usmall := math.Min(U1, U2) var p float64 if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit || hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit { // Use exact U distribution. U1 will be an integer. if len(T) == 1 { // All values are equal. Test is meaningless. return nil, ErrSamplesEqual } dist := UDist{N1: n1, N2: n2, T: T} switch alt { case LocationDiffers: if U1 == U2 { // The distribution is symmetric about // Usmall. Since the distribution is // discrete, the CDF is discontinuous // and if simply double CDF(Usmall), // we'll double count the // (non-infinitesimal) probability // mass at Usmall. What we want is // just the integral of the whole CDF, // which is 1. p = 1 } else { p = dist.CDF(Usmall) * 2 } case LocationLess: p = dist.CDF(U1) case LocationGreater: p = 1 - dist.CDF(U1-1) } } else { // Use normal approximation (with tie and continuity // correction). t := tieCorrection(T) N := float64(n1 + n2) μ_U := float64(n1*n2) / 2 σ_U := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12) if σ_U == 0 { return nil, ErrSamplesEqual } numer := U1 - μ_U // Perform continuity correction. switch alt { case LocationDiffers: numer -= mathx.Sign(numer) * 0.5 case LocationLess: numer += 0.5 case LocationGreater: numer -= 0.5 } z := numer / σ_U switch alt { case LocationDiffers: p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z)) case LocationLess: p = StdNormal.CDF(z) case LocationGreater: p = 1 - StdNormal.CDF(z) } } return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1, AltHypothesis: alt, P: p}, nil }