/
gaussian.go
310 lines (231 loc) · 8.41 KB
/
gaussian.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
package goanomaly
import (
"math"
"math/big"
"sync"
)
var (
pi = big.NewFloat(math.Pi)
zero = big.NewFloat(float64(0))
one = big.NewFloat(float64(1))
two = big.NewFloat(float64(2))
e = big.NewFloat(math.E)
// NOT USED AT THE MOMENT
delimiter = big.NewFloat(float64(100)) // this is the delimiter for between a small set and a large set (does not change much)
// pre calculated constants
doublePi = new(big.Float).Mul(two, pi)
doublePiValue, _ = doublePi.Float64()
doublePiSqrt = big.NewFloat(math.Sqrt(doublePiValue))
)
type AnomalyDetection struct {
dataSet []big.Float
totalSamples big.Float
totalSum big.Float
mean big.Float // this is the average mean
variance big.Float // this is the average variance
deviation big.Float // this is the average deviation
}
type AnomalyDetectionVector []*AnomalyDetection
// Creates an anomaly detection object with multi dimension dataset (multivariate)
func NewAnomalyDetectionVector(vector ...[]big.Float) AnomalyDetectionVector {
var adVector AnomalyDetectionVector
// wait group
var wg sync.WaitGroup
// mutex used to append
var initVectorMutex sync.Mutex
for _, data := range vector {
//adv = append(adv, data)
// Increment the WaitGroup counter.
wg.Add(1)
// Launch a goroutine to fetch the URL.
go func(m sync.Mutex, anomalyVector *AnomalyDetectionVector, set ...big.Float) {
// Decrement the counter when the goroutine completes.
defer wg.Done()
// init the anomaly detection object: this is the expensive call, based on the dataset
anomalyDetection := NewAnomalyDetection(set...)
// lock the mutex and append
m.Lock()
// de-reference pointer
adv := *anomalyVector
// append
adv = append(adv, anomalyDetection)
// set the new pointer
anomalyVector = &adv
// unlock the mutex
m.Unlock()
}(initVectorMutex, &adVector, data...)
}
// wait for thego routines to finish
wg.Wait()
return adVector
}
func (adVector AnomalyDetectionVector) EventIsAnomalous(eventX big.Float, threshold *big.Float) (bool, float64) {
// wait group
var wg sync.WaitGroup
var singleProbabilities []*big.Float
// mutex used to append
var probabilityMutex sync.Mutex
for _, ad := range adVector {
//adv = append(adv, data)
// Increment the WaitGroup counter.
wg.Add(1)
// Launch a goroutine to fetch the URL.
go func(m sync.Mutex, anomaly *AnomalyDetection, prob *[]*big.Float, eX big.Float) {
// Decrement the counter when the goroutine completes.
defer wg.Done()
// calculate the probability
probability := anomaly.calculateProbability(eX)
// lock the mutex and append
m.Lock()
// de-reference pointer
p := *prob
// append
p = append(p, probability)
// set the new pointer
prob = &p
// unlock the mutex
m.Unlock()
}(probabilityMutex, ad, &singleProbabilities, eventX)
}
// multiply all the probabilities together
totalProbability := big.NewFloat(1)
for _, probability := range singleProbabilities {
totalProbability.Mul(totalProbability, probability)
}
// get the float64 form the total probability
r, _ := totalProbability.Float64()
// if the total probability is lower than the threshold then the event is anomalous
return totalProbability.Cmp(threshold) < 0, r
}
// Creates an anomaly detection object with a one dimension dataset
func NewAnomalyDetection(data ...big.Float) *AnomalyDetection {
ad := AnomalyDetection{}
ad.dataSet = data
ad.totalSamples.SetFloat64(float64(len(data)))
// estimate the mean already
ad.estimateMean()
// estimate variance
ad.estimateVariance()
return &ad
}
func (ad *AnomalyDetection) ExpandDataSet(data ...big.Float) {
if ad.dataSet == nil {
// Should return an error because the dataSet was cleared
return;
}
ad.totalSum = *new(big.Float)
// means totalSamples is smaller than delimiter
totalSamples := big.NewFloat(float64(len(data)))
// if totalSamples.Cmp(delimiter) < 0 {
// totalSamples.Sub(totalSamples, one)
// }
ad.totalSamples = *totalSamples
ad.estimateMean()
ad.estimateVariance()
}
// ClearDataSet reset the dataSet to nil to release resources
func (ad *AnomalyDetection) ClearDataSet() {
ad.dataSet = nil
}
// This method calculates the probability with probability density formula
// TODO: CREATE THE SQRT and EXP methods for bignum
func (ad *AnomalyDetection) calculateProbability(eventX big.Float) *big.Float {
// Right term
rightTerm := new(big.Float).Sub(&eventX, &ad.mean) // no need to take the Absolute value because we square on the next step
rightTerm.Mul(rightTerm, rightTerm)
// take the variance and double it
tempA := new(big.Float).Mul(two, &ad.variance)
// divide eventXDeviationSquared with doubleVariance
rightTerm.Quo(rightTerm, tempA)
// get its value
rightFloat, _ := rightTerm.Float64()
// do e^(-right term value)
rightFloat = math.Exp(-rightFloat)
// ======================================
// Left term
// Init the holder of the final value of the first term
// Multiply the Square root of the the 2*pi for the deviation
tempA.Mul(doublePiSqrt, &ad.deviation)
// multiply the two terms
return rightTerm.Quo(rightTerm.SetFloat64(rightFloat), tempA)
}
// Verifies whether a specific event X is anomalous or not
func (ad *AnomalyDetection) EventIsAnomalous(eventX big.Float, threshold *big.Float) (bool, float64) {
probability := ad.calculateProbability(eventX)
r, _ := probability.Float64()
return probability.Cmp(threshold) < 0, r
}
// Verifies whether a specific event X is anomalous or not
// This method calculates the probability with probability density formula
// TODO: CREATE THE SQRT and EXP methods for bignum
func (ad *AnomalyDetection) EventXIsAnomalous(eventX, threshold *big.Float) (bool, *big.Float) {
// Right term
rightTerm := new(big.Float).Sub(eventX, &ad.mean) // no need to take the Absolute value because we square on the next step
rightTerm.Mul(rightTerm, rightTerm)
// take the variance and double it
tempA := new(big.Float).Mul(two, &ad.variance)
if tempA.Cmp(zero) == 0 {
return false, zero
}
// divide eventXDeviationSquared with doubleVariance
rightTerm.Quo(rightTerm, tempA)
// get its value
rightFloat, _ := rightTerm.Float64()
// do e^(-right term value)
rightFloat = math.Exp(-rightFloat)
// ======================================
// Left term
// Init the holder of the final value of the first term
// Multiply the Square root of the the 2*pi for the deviation
tempA.Mul(doublePiSqrt, &ad.deviation)
// multiply the two terms
rightTerm.Quo(rightTerm.SetFloat64(rightFloat), tempA)
return rightTerm.Cmp(threshold) < 0, rightTerm
}
// Estimates the Mean based on the data set
// If the data set is relatively small (< 1000 examples), then remove 1 from the total
func (ad *AnomalyDetection) estimateMean() *big.Float {
// initialize the total to zero
totalSum := new(big.Float)
// Loop thorugh the data set
for _, element := range ad.dataSet {
// sum up its elements
totalSum.Add(totalSum, &element)
//e, _ := element.Float64()
}
// make a copy of the total sum and assign it to the anomaly detection object
ad.totalSum.Copy(totalSum)
// calculate the mean and return
return ad.mean.Quo(totalSum, &ad.totalSamples)
}
// Estimates the Variance based on the data set
// If the data set is relatively small (< 1000 examples), then remove 1 from the total
func (ad *AnomalyDetection) estimateVariance() *big.Float {
// this means that the mean was never calculated before, therefore do it now
// the means is needed for the cimputation of the deviation
if ad.mean.Cmp(zero) == 0 {
ad.estimateMean()
}
// initialize the total to zero
totalVariance := new(big.Float)
totalDeviation := new(big.Float)
deviation := new(big.Float)
var singleVariance *big.Float
// Loop while a is smaller than 1e100.
for _, element := range ad.dataSet {
// first calculate the deviation for each element, by subtracting the mean, take the absolute value
deviation.Sub(&element, &ad.mean).Abs(deviation)
// add it to the total
totalDeviation.Add(totalDeviation, deviation)
// calculate the variance by squaring it
singleVariance = deviation.Mul(deviation, deviation) // ^2
// the calculate the variance
totalVariance.Add(totalVariance, singleVariance)
}
// calculate the variance
// assign the variance to the anomaly detection object
ad.variance = *totalVariance.Quo(totalVariance, &ad.totalSamples)
// calculate the deviation
ad.deviation = *totalDeviation.Quo(totalDeviation, &ad.totalSamples)
return &ad.variance
}