Exemplo n.º 1
0
// bisect returns an x in [low, high] such that |f(x)| <= tolerance
// using the bisection method.
//
// f(low) and f(high) must have opposite signs.
//
// If f does not have a root in this interval (e.g., it is
// discontiguous), this returns the X of the apparent discontinuity
// and false.
func bisect(f func(float64) float64, low, high, tolerance float64) (float64, bool) {
	flow, fhigh := f(low), f(high)
	if -tolerance <= flow && flow <= tolerance {
		return low, true
	}
	if -tolerance <= fhigh && fhigh <= tolerance {
		return high, true
	}
	if mathx.Sign(flow) == mathx.Sign(fhigh) {
		panic(fmt.Sprintf("root of f is not bracketed by [low, high]; f(%g)=%g f(%g)=%g", low, flow, high, fhigh))
	}
	for {
		mid := (high + low) / 2
		fmid := f(mid)
		if -tolerance <= fmid && fmid <= tolerance {
			return mid, true
		}
		if mid == high || mid == low {
			return mid, false
		}
		if mathx.Sign(fmid) == mathx.Sign(flow) {
			low = mid
			flow = fmid
		} else {
			high = mid
			fhigh = fmid
		}
	}
}
Exemplo n.º 2
0
// MannWhitneyUTest performs a Mann-Whitney U-test [1,2] of the null
// hypothesis that two samples come from the same population against
// the alternative hypothesis that one sample tends to have larger or
// smaller values than the other.
//
// This is similar to a t-test, but unlike the t-test, the
// Mann-Whitney U-test is non-parametric (it does not assume a normal
// distribution). It has very slightly lower efficiency than the
// t-test on normal distributions.
//
// Computing the exact U distribution is expensive for large sample
// sizes, so this uses a normal approximation for sample sizes larger
// than MannWhitneyExactLimit if there are no ties or
// MannWhitneyTiesExactLimit if there are ties. This normal
// approximation uses both the tie correction and the continuity
// correction.
//
// This can fail with ErrSampleSize if either sample is empty or
// ErrSamplesEqual if all sample values are equal.
//
// This is also known as a Mann-Whitney-Wilcoxon test and is
// equivalent to the Wilcoxon rank-sum test, though the Wilcoxon
// rank-sum test differs in nomenclature.
//
// [1] Mann, Henry B.; Whitney, Donald R. (1947). "On a Test of
// Whether one of Two Random Variables is Stochastically Larger than
// the Other". Annals of Mathematical Statistics 18 (1): 50–60.
//
// [2] Klotz, J. H. (1966). "The Wilcoxon, Ties, and the Computer".
// Journal of the American Statistical Association 61 (315): 772-787.
func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) {
	n1, n2 := len(x1), len(x2)
	if n1 == 0 || n2 == 0 {
		return nil, ErrSampleSize
	}

	// Compute the U statistic and tie vector T.
	x1 = append([]float64(nil), x1...)
	x2 = append([]float64(nil), x2...)
	sort.Float64s(x1)
	sort.Float64s(x2)
	merged, labels := labeledMerge(x1, x2)

	R1 := 0.0
	T, hasTies := []int{}, false
	for i := 0; i < len(merged); {
		rank1, nx1, v1 := i+1, 0, merged[i]
		// Consume samples that tie this sample (including itself).
		for ; i < len(merged) && merged[i] == v1; i++ {
			if labels[i] == 1 {
				nx1++
			}
		}
		// Assign all tied samples the average rank of the
		// samples, where merged[0] has rank 1.
		if nx1 != 0 {
			rank := float64(i+rank1) / 2
			R1 += rank * float64(nx1)
		}
		T = append(T, i-rank1+1)
		if i > rank1 {
			hasTies = true
		}
	}
	U1 := R1 - float64(n1*(n1+1))/2

	// Compute the smaller of U1 and U2
	U2 := float64(n1*n2) - U1
	Usmall := math.Min(U1, U2)

	var p float64
	if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit ||
		hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit {
		// Use exact U distribution. U1 will be an integer.
		if len(T) == 1 {
			// All values are equal. Test is meaningless.
			return nil, ErrSamplesEqual
		}

		dist := UDist{N1: n1, N2: n2, T: T}
		switch alt {
		case LocationDiffers:
			if U1 == U2 {
				// The distribution is symmetric about
				// Usmall. Since the distribution is
				// discrete, the CDF is discontinuous
				// and if simply double CDF(Usmall),
				// we'll double count the
				// (non-infinitesimal) probability
				// mass at Usmall. What we want is
				// just the integral of the whole CDF,
				// which is 1.
				p = 1
			} else {
				p = dist.CDF(Usmall) * 2
			}

		case LocationLess:
			p = dist.CDF(U1)

		case LocationGreater:
			p = 1 - dist.CDF(U1-1)
		}
	} else {
		// Use normal approximation (with tie and continuity
		// correction).
		t := tieCorrection(T)
		N := float64(n1 + n2)
		μ_U := float64(n1*n2) / 2
		σ_U := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12)
		if σ_U == 0 {
			return nil, ErrSamplesEqual
		}
		numer := U1 - μ_U
		// Perform continuity correction.
		switch alt {
		case LocationDiffers:
			numer -= mathx.Sign(numer) * 0.5
		case LocationLess:
			numer += 0.5
		case LocationGreater:
			numer -= 0.5
		}
		z := numer / σ_U
		switch alt {
		case LocationDiffers:
			p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z))
		case LocationLess:
			p = StdNormal.CDF(z)
		case LocationGreater:
			p = 1 - StdNormal.CDF(z)
		}
	}

	return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1,
		AltHypothesis: alt, P: p}, nil
}