// Matrix factorization. func mfFunc(idx uint64, ctx *occult.Context) (occult.Value, error) { opt := ctx.Options.(*Options) // input 0 has chunks of data chunks := ctx.Inputs()[0] // chunks of observations // input 1 has aggregated data from a previous pass through the entire data set in1, e1 := ctx.Inputs()[1](0) // aggregated data if e1 != nil { return nil, e1 } cf := in1.(*CF) cf.InitMF(opt.numFactors, opt.learnRate, opt.regularization, opt.meanNorm) // Now we can iterate over chunks and for each chunk. var c, iter uint64 for ; iter < idx; iter++ { glog.V(1).Infof("GD iter: %d", iter) for c = 0; ; c++ { in0, err := chunks(c) if err == occult.ErrEndOfArray { break } s := in0.([]Obs) for _, v := range s { cf.GDUpdate(v.User, v.Item, v.Rating) } } } return cf, nil }
// Aggregate CF. func aggCFFunc(idx uint64, ctx *occult.Context) (occult.Value, error) { opt := ctx.Options.(*Options) if idx > 0 { return nil, occult.ErrEndOfArray } cf := NewCF(opt.alpha) ch := ctx.Inputs()[0].MapAll(0, ctx) for { v, ok := <-ch if !ok { if glog.V(5) { glog.Infof("aggCFFunc returning idx:%d, NumRatingsx:%#v", idx, cf.NumRatings) } return cf, nil } q := v.(*CF) cf.Reduce(q) } }
// Computes various global statistics on the data set. func cfFunc(idx uint64, ctx *occult.Context) (occult.Value, error) { opt := ctx.Options.(*Options) in, err := ctx.Inputs()[0](idx) if err != nil && err != occult.ErrEndOfArray { return nil, err // something is wrong } if in == nil { return nil, occult.ErrEndOfArray } s := in.([]Obs) cf := NewCF(opt.alpha) for _, v := range s { r := v.Rating if r < 1 || r > 5 { return nil, fmt.Errorf("rating out of range: %d", r) } cf.Update(v.User, v.Item, v.Rating) } if glog.V(5) { glog.Infof("cfFunc returning idx:%d, NumRatingsx:%#v", idx, cf.NumRatings) } return cf, err // err may be ErrEndOfArray }