// Generates the cumulative distribution function using the difference in the means // for the data. func CDFTest(vector govector.Vector, conf AnomalyzerConf) float64 { diffs := vector.Diff().Apply(math.Abs) reference, active, err := extractWindows(diffs, conf.referenceSize-1, conf.ActiveSize, conf.ActiveSize) if err != nil { return NA } // Find the empircal distribution function using the reference window. refEcdf := reference.Ecdf() // Difference between the active and reference means. activeDiff := active.Mean() - reference.Mean() // Apply the empirical distribution function to that difference. percentile := refEcdf(activeDiff) // Scale so max probability is in tails and prob at 0.5 is 0. return (2 * math.Abs(0.5-percentile)) }
func BootstrapKsTest(vector govector.Vector, conf AnomalyzerConf) float64 { dist := KsStat(vector, conf) if dist == NA { return NA } i := 0 significant := 0 for i < conf.PermCount { permVector := vector.Shuffle() permDist := KsStat(permVector, conf) if permDist < dist { significant++ } i++ } return float64(significant) / float64(conf.PermCount) }
// Generates permutations of reference and active window values to determine // whether or not data is anomalous. The number of permutations desired defaults // to 500 but can be increased for more precision. A comparison function above // can be specified to create Rank and ReverseRank tests. func rankTest(vector govector.Vector, conf AnomalyzerConf, comparison compare) float64 { // Rank the elements of a vector ranks := vector.Rank() _, active, err := extractWindows(ranks, conf.referenceSize, conf.ActiveSize, conf.ActiveSize) if err != nil { return NA } // Consider the sum of the ranks across the active data. This is the sum that // we will compare our permutations to. activeSum := active.Sum() i := 0 significant := 0 // Permute the active and reference data and compute the sums across the tail // (from the length of the reference data to the full length). for i < conf.PermCount { permRanks := vector.Shuffle().Rank() _, permActive, _ := extractWindows(permRanks, conf.referenceSize, conf.ActiveSize, conf.ActiveSize) // If we find a sum that is less than the initial sum across the active data, // this implies our initial sum might be uncharacteristically high. We increment // our count. permSum := permActive.Sum() if comparison(permSum, activeSum) { significant++ } i++ } // We return the percentage of the number of iterations where we found our initial // sum to be high. return float64(significant) / float64(conf.PermCount) }
// Generates permutations of reference and active window values to determine // whether or not data is anomalous. The number of permutations desired has // been set to 500 but can be increased for more precision. func DiffTest(vector govector.Vector, conf AnomalyzerConf) float64 { // Find the differences between neighboring elements and rank those differences. ranks := vector.RelDiff().Apply(math.Abs).Rank() // The indexing runs to length-1 because after applying .Diff(), We have // decreased the length of out vector by 1. _, active, err := extractWindows(ranks, conf.referenceSize-1, conf.ActiveSize, conf.ActiveSize) if err != nil { return NA } // Consider the sum of the ranks across the active data. This is the sum that // we will compare our permutations to. activeSum := active.Sum() i := 0 significant := 0 // Permute the active and reference data and compute the sums across the tail // (from the length of the reference data to the full length). for i < conf.PermCount { permRanks := vector.Shuffle().RelDiff().Apply(math.Abs).Rank() _, permActive, _ := extractWindows(permRanks, conf.referenceSize-1, conf.ActiveSize, conf.ActiveSize) // If we find a sum that is less than the initial sum across the active data, // this implies our initial sum might be uncharacteristically high. We increment // our count. if permActive.Sum() < activeSum { significant++ } i++ } // We return the percentage of the number of iterations where we found our initial // sum to be high. return float64(significant) / float64(conf.PermCount) }