func (k *knn) Estimate(data gotypes.Value) float64 { result := 0.0 for i := 0; i < k.data.Rows(); i++ { v, _ := gotypes.WrapArray(k.data.Row(i), gotypes.ValueType) result += gaussian(v, data, k.h) } return result / float64(k.data.Rows()) }
// KMeans clustering // TODO: Change to not only be numeric func KMeans(data []classify.Data, K int, metric Metric, means [][]float64) [][]float64 { features := len(data[0].Value()) if means == nil { /* Todo: randomly initialize k means */ means = make([][]float64, K) for i := range means { means[i] = make([]float64, features) val := data[rand.Intn(len(data))] for j, v := range val.Value() { means[i][j] = v.Real() } } } counts := make([]int, K) newMeans := make([][]float64, K) for i := range newMeans { newMeans[i] = make([]float64, features) } maxElemPerThread := 10000 // 1K iterations := gomath.MaxInt(1, (len(data)+maxElemPerThread-1)/maxElemPerThread) elemPerThread := (len(data) + iterations - 1) / iterations change := true for change { change = false var lock sync.Mutex parallels.Foreach(func(i int) bool { // List of stuff nMs := make([][]float64, K) cs := make([]int, K) for i := range nMs { nMs[i] = make([]float64, features) } start := i * elemPerThread end := gomath.MinInt(start+elemPerThread, len(data)) for start < end { d := data[start] closest := 0 wrap, err := gotypes.WrapArray(means[0], gotypes.Array) if err != nil { panic(err) } dist := metric(d.Value(), wrap.Array()) for k := 1; k < K; k++ { wrap, err = gotypes.WrapArray(means[k], gotypes.Array) if err != nil { panic(err) } nDist := metric(d.Value(), wrap.Array()) if nDist < dist { dist = nDist closest = k } } // Save cs[closest]++ for i := 0; i < features; i++ { t := d.Value()[i].Real() nMs[closest][i] += t } start++ } lock.Lock() defer lock.Unlock() for i := 0; i < K; i++ { for j := 0; j < features; j++ { newMeans[i][j] += nMs[i][j] } counts[i] += cs[i] } return false }, iterations) // Update means for i := range newMeans { for j := range newMeans[i] { newMeans[i][j] /= float64(counts[i]) } if !same(newMeans[i], means[i]) { change = true } for j := range newMeans[i] { means[i][j] = newMeans[i][j] newMeans[i][j] = 0 } counts[i] = 0 } } return means }