Beispiel #1
0
func (k *knn) Estimate(data gotypes.Value) float64 {
	result := 0.0
	for i := 0; i < k.data.Rows(); i++ {
		v, _ := gotypes.WrapArray(k.data.Row(i), gotypes.ValueType)
		result += gaussian(v, data, k.h)
	}
	return result / float64(k.data.Rows())
}
Beispiel #2
0
// KMeans clustering
// TODO: Change to not only be numeric
func KMeans(data []classify.Data, K int, metric Metric, means [][]float64) [][]float64 {

	features := len(data[0].Value())

	if means == nil {
		/* Todo: randomly initialize k means */
		means = make([][]float64, K)
		for i := range means {
			means[i] = make([]float64, features)
			val := data[rand.Intn(len(data))]
			for j, v := range val.Value() {
				means[i][j] = v.Real()
			}
		}
	}

	counts := make([]int, K)
	newMeans := make([][]float64, K)
	for i := range newMeans {
		newMeans[i] = make([]float64, features)
	}

	maxElemPerThread := 10000 // 1K
	iterations := gomath.MaxInt(1, (len(data)+maxElemPerThread-1)/maxElemPerThread)
	elemPerThread := (len(data) + iterations - 1) / iterations

	change := true
	for change {
		change = false
		var lock sync.Mutex
		parallels.Foreach(func(i int) bool {
			// List of stuff
			nMs := make([][]float64, K)
			cs := make([]int, K)

			for i := range nMs {
				nMs[i] = make([]float64, features)
			}

			start := i * elemPerThread
			end := gomath.MinInt(start+elemPerThread, len(data))

			for start < end {
				d := data[start]
				closest := 0
				wrap, err := gotypes.WrapArray(means[0], gotypes.Array)
				if err != nil {
					panic(err)
				}
				dist := metric(d.Value(), wrap.Array())
				for k := 1; k < K; k++ {
					wrap, err = gotypes.WrapArray(means[k], gotypes.Array)
					if err != nil {
						panic(err)
					}
					nDist := metric(d.Value(), wrap.Array())
					if nDist < dist {
						dist = nDist
						closest = k
					}
				}

				// Save
				cs[closest]++
				for i := 0; i < features; i++ {
					t := d.Value()[i].Real()
					nMs[closest][i] += t
				}

				start++
			}

			lock.Lock()
			defer lock.Unlock()

			for i := 0; i < K; i++ {
				for j := 0; j < features; j++ {
					newMeans[i][j] += nMs[i][j]
				}
				counts[i] += cs[i]
			}
			return false
		}, iterations)

		// Update means
		for i := range newMeans {
			for j := range newMeans[i] {
				newMeans[i][j] /= float64(counts[i])
			}
			if !same(newMeans[i], means[i]) {
				change = true
			}
			for j := range newMeans[i] {
				means[i][j] = newMeans[i][j]
				newMeans[i][j] = 0
			}
			counts[i] = 0
		}
	}

	return means
}