Пример #1
0
// Matrix factorization.
func mfFunc(idx uint64, ctx *occult.Context) (occult.Value, error) {
	opt := ctx.Options.(*Options)

	// input 0 has chunks of data
	chunks := ctx.Inputs()[0] // chunks of observations

	// input 1 has aggregated data from a previous pass through the entire data set
	in1, e1 := ctx.Inputs()[1](0) // aggregated data
	if e1 != nil {
		return nil, e1
	}
	cf := in1.(*CF)
	cf.InitMF(opt.numFactors, opt.learnRate, opt.regularization, opt.meanNorm)
	// Now we can iterate over chunks and for each chunk.
	var c, iter uint64
	for ; iter < idx; iter++ {
		glog.V(1).Infof("GD iter: %d", iter)
		for c = 0; ; c++ {
			in0, err := chunks(c)
			if err == occult.ErrEndOfArray {
				break
			}
			s := in0.([]Obs)
			for _, v := range s {
				cf.GDUpdate(v.User, v.Item, v.Rating)
			}
		}
	}
	return cf, nil
}
Пример #2
0
// Aggregate CF.
func aggCFFunc(idx uint64, ctx *occult.Context) (occult.Value, error) {
	opt := ctx.Options.(*Options)
	if idx > 0 {
		return nil, occult.ErrEndOfArray
	}
	cf := NewCF(opt.alpha)
	ch := ctx.Inputs()[0].MapAll(0, ctx)
	for {
		v, ok := <-ch
		if !ok {
			if glog.V(5) {
				glog.Infof("aggCFFunc returning idx:%d, NumRatingsx:%#v", idx, cf.NumRatings)
			}
			return cf, nil
		}
		q := v.(*CF)
		cf.Reduce(q)
	}
}
Пример #3
0
// Computes various global statistics on the data set.
func cfFunc(idx uint64, ctx *occult.Context) (occult.Value, error) {
	opt := ctx.Options.(*Options)
	in, err := ctx.Inputs()[0](idx)
	if err != nil && err != occult.ErrEndOfArray {
		return nil, err // something is wrong
	}
	if in == nil {
		return nil, occult.ErrEndOfArray
	}
	s := in.([]Obs)
	cf := NewCF(opt.alpha)
	for _, v := range s {
		r := v.Rating
		if r < 1 || r > 5 {
			return nil, fmt.Errorf("rating out of range: %d", r)
		}
		cf.Update(v.User, v.Item, v.Rating)
	}
	if glog.V(5) {
		glog.Infof("cfFunc returning idx:%d, NumRatingsx:%#v", idx, cf.NumRatings)
	}
	return cf, err // err may be ErrEndOfArray
}