func writeStore(f *zip.File, nodeID int) (dbName string) { rc, e := f.Open() if e != nil { glog.Fatal(e) } defer rc.Close() scanner := bufio.NewScanner(rc) // Create a custom split function by wrapping the existing ScanLines function. split := func(data []byte, atEOF bool) (advance int, line []byte, err error) { advance, line, err = bufio.ScanLines(data, atEOF) if err == nil && line != nil { // can validate here and return error. } return } // Set the split function for the scanning operation. scanner.Split(split) // create store name := path.Base(f.Name) + "-" + strconv.Itoa(nodeID) dbName = path.Join(OutDir, name) // Return if db exists. if _, err := os.Stat(dbName); err == nil { glog.Infof("db %s already exist, skipping...", dbName) return dbName } glog.Infof("creating store %s", dbName) db, err := store.NewStore(dbName) fatalIf(err) defer db.Close() var key uint64 for scanner.Scan() { newObs := Obs{} fields := strings.Fields(scanner.Text()) newObs.User, e = strconv.Atoi(fields[0]) fatalIf(e) newObs.Item, e = strconv.Atoi(fields[1]) fatalIf(e) newObs.Rating, e = strconv.Atoi(fields[2]) fatalIf(e) var io interface{} = newObs fatalIf(db.Put(key, &io)) key++ } if err = scanner.Err(); err != nil { glog.Fatalf("Invalid input: %s", err) } glog.Infof("wrote %d records", key) return dbName }
// the app func TrainCF(dbName string, config *occult.Config, chunkSize int) *CF { var db *store.Store var err error db, err = store.NewStore(dbName) fatalIf(err) defer db.Close() var numGDIterations uint64 = 40 opt := &Options{ db: db, chunkSize: chunkSize, regularization: 0.1, learnRate: 0.01, numFactors: 4, meanNorm: false, alpha: 1, } app := occult.NewApp(config) dataChunk := app.AddSource(movieFunc, opt, nil) cfProc := app.Add(cfFunc, opt, dataChunk) aggCFProc := app.Add(aggCFFunc, opt, cfProc) mfProc := app.Add(mfFunc, opt, dataChunk, aggCFProc) // If server, stays here forever, otherwise keep going. app.Run() glog.Infof("num logical CPUs: %d", runtime.NumCPU()) start := time.Now() y, ey := mfProc(numGDIterations) // the index is the # iterations if ey != nil { glog.Fatal(ey) } end := time.Now() d := end.Sub(start) glog.Infof("train duration: %v", d) app.Shutdown() return y.(*CF) }
func EvalCF(dbTest string, config *occult.Config, cf *CF) { db, err := store.NewStore(dbTest) fatalIf(err) defer db.Close() opt := &EvalOptions{ db: db, cf: cf, globalMean: cf.GlobalMean(), sqErr: &SqErr{}, } app := occult.NewApp(config) evalProc := app.AddSource(evalFunc, opt, nil) var i uint64 for { v, e := evalProc(i) if e != nil && e != occult.ErrEndOfArray { glog.Fatal(e) } if v != nil { glog.V(5).Infof("chunk[%4d]: %v", i, v) } if e == occult.ErrEndOfArray { glog.V(3).Infof("end of array found at index %d", i) break } i++ } n := float64(opt.sqErr.n) glog.Infof("N:%.0f, alpha:%.2f", n, cf.alpha) glog.Infof("%20s: %.4f", "Global Mean", math.Sqrt(opt.sqErr.globalMean/n)) glog.Infof("%20s: %.4f", "Adj. User Mean", math.Sqrt(opt.sqErr.weightedUserMean/n)) glog.Infof("%20s: %.4f", "Item Mean", math.Sqrt(opt.sqErr.weightedItemMean/n)) glog.Infof("%20s: %.4f", "Simple MF", math.Sqrt(opt.sqErr.mf/n)) }