Beispiel #1
0
func main() {
	gopark.ParseOptions()
	c := gopark.NewContext("testflight")
	defer c.Stop()

	WordCount(c)
	ComputePi(c)
	LogisticRegression(c)
}
Beispiel #2
0
func main() {
	gopark.ParseOptions()
	c := gopark.NewContext("kmeans")
	defer c.Stop()

	// This is important, have to register user types for gob to correctly encode.
	gob.Register(new(CenterCounter))

	D := 4
	K := 3
	MIN_DIST := 0.01

	centers := make([]gopark.Vector, K)
	for i := range centers {
		center := make(gopark.Vector, D)
		for j := range center {
			center[j] = rand.Float64()
		}
		centers[i] = center
	}
	fmt.Println(centers)

	points := c.TextFile("kmean_data.txt").Map(func(line interface{}) interface{} {
		vs := strings.Fields(line.(string))
		dims := make(gopark.Vector, len(vs))
		for i := range vs {
			dims[i], _ = strconv.ParseFloat(vs[i], 64)
		}
		return dims
	}).Cache()

	for i := 0; i < 10; i++ {
		fmt.Println("Iter:", i)
		mappedPoints := points.Map(func(x interface{}) interface{} {
			p := x.(gopark.Vector)
			center := CloseCenter(p, centers)
			return &gopark.KeyValue{
				Key:   center,
				Value: &CenterCounter{p, 1},
			}
		})
		newCenters := mappedPoints.ReduceByKey(func(x, y interface{}) interface{} {
			cc1 := x.(*CenterCounter)
			cc2 := y.(*CenterCounter)
			return &CenterCounter{
				X:     cc1.X.Plus(cc2.X),
				Count: cc1.Count + cc2.Count,
			}
		}).Map(func(x interface{}) interface{} {
			keyValue := x.(*gopark.KeyValue)
			cc := keyValue.Value.(*CenterCounter)
			return &gopark.KeyValue{
				Key:   keyValue.Key,
				Value: cc.X.Divide(float64(cc.Count)),
			}
		}).CollectAsMap()

		updated := false
		for key, value := range newCenters {
			center := value.(gopark.Vector)
			cid := key.(int)
			if center.EulaDistance(centers[cid]) > MIN_DIST {
				centers[cid] = center
				updated = true
			}
		}
		if !updated {
			break
		}
	}

	fmt.Println("Final Centers:", centers)
}