예제 #1
0
// Generates the cumulative distribution function using the difference in the means
// for the data.
func CDFTest(vector govector.Vector, conf AnomalyzerConf) float64 {
	diffs := vector.Diff().Apply(math.Abs)
	reference, active, err := extractWindows(diffs, conf.referenceSize-1, conf.ActiveSize, conf.ActiveSize)
	if err != nil {
		return NA
	}

	// Find the empircal distribution function using the reference window.
	refEcdf := reference.Ecdf()

	// Difference between the active and reference means.
	activeDiff := active.Mean() - reference.Mean()

	// Apply the empirical distribution function to that difference.
	percentile := refEcdf(activeDiff)

	// Scale so max probability is in tails and prob at 0.5 is 0.
	return (2 * math.Abs(0.5-percentile))
}
예제 #2
0
func BootstrapKsTest(vector govector.Vector, conf AnomalyzerConf) float64 {
	dist := KsStat(vector, conf)
	if dist == NA {
		return NA
	}

	i := 0
	significant := 0

	for i < conf.PermCount {
		permVector := vector.Shuffle()
		permDist := KsStat(permVector, conf)

		if permDist < dist {
			significant++
		}
		i++
	}
	return float64(significant) / float64(conf.PermCount)
}
예제 #3
0
// Generates permutations of reference and active window values to determine
// whether or not data is anomalous. The number of permutations desired defaults
// to 500 but can be increased for more precision. A comparison function above
// can be specified to create Rank and ReverseRank tests.
func rankTest(vector govector.Vector, conf AnomalyzerConf, comparison compare) float64 {
	// Rank the elements of a vector
	ranks := vector.Rank()

	_, active, err := extractWindows(ranks, conf.referenceSize, conf.ActiveSize, conf.ActiveSize)
	if err != nil {
		return NA
	}

	// Consider the sum of the ranks across the active data. This is the sum that
	// we will compare our permutations to.
	activeSum := active.Sum()

	i := 0
	significant := 0

	// Permute the active and reference data and compute the sums across the tail
	// (from the length of the reference data to the full length).
	for i < conf.PermCount {
		permRanks := vector.Shuffle().Rank()
		_, permActive, _ := extractWindows(permRanks, conf.referenceSize, conf.ActiveSize, conf.ActiveSize)

		// If we find a sum that is less than the initial sum across the active data,
		// this implies our initial sum might be uncharacteristically high. We increment
		// our count.

		permSum := permActive.Sum()
		if comparison(permSum, activeSum) {
			significant++
		}
		i++
	}
	// We return the percentage of the number of iterations where we found our initial
	// sum to be high.
	return float64(significant) / float64(conf.PermCount)
}
예제 #4
0
// Generates permutations of reference and active window values to determine
// whether or not data is anomalous. The number of permutations desired has
// been set to 500 but can be increased for more precision.
func DiffTest(vector govector.Vector, conf AnomalyzerConf) float64 {
	// Find the differences between neighboring elements and rank those differences.
	ranks := vector.RelDiff().Apply(math.Abs).Rank()

	// The indexing runs to length-1 because after applying .Diff(), We have
	// decreased the length of out vector by 1.
	_, active, err := extractWindows(ranks, conf.referenceSize-1, conf.ActiveSize, conf.ActiveSize)
	if err != nil {
		return NA
	}

	// Consider the sum of the ranks across the active data. This is the sum that
	// we will compare our permutations to.
	activeSum := active.Sum()

	i := 0
	significant := 0

	// Permute the active and reference data and compute the sums across the tail
	// (from the length of the reference data to the full length).
	for i < conf.PermCount {
		permRanks := vector.Shuffle().RelDiff().Apply(math.Abs).Rank()
		_, permActive, _ := extractWindows(permRanks, conf.referenceSize-1, conf.ActiveSize, conf.ActiveSize)

		// If we find a sum that is less than the initial sum across the active data,
		// this implies our initial sum might be uncharacteristically high. We increment
		// our count.
		if permActive.Sum() < activeSum {
			significant++
		}
		i++
	}
	// We return the percentage of the number of iterations where we found our initial
	// sum to be high.
	return float64(significant) / float64(conf.PermCount)
}