Пример #1
0
// the app
func TrainCF(dbName string, config *occult.Config, chunkSize int) *CF {

	var db *store.Store
	var err error

	db, err = store.NewStore(dbName)
	fatalIf(err)
	defer db.Close()

	var numGDIterations uint64 = 40
	opt := &Options{
		db:             db,
		chunkSize:      chunkSize,
		regularization: 0.1,
		learnRate:      0.01,
		numFactors:     4,
		meanNorm:       false,
		alpha:          1,
	}

	app := occult.NewApp(config)
	dataChunk := app.AddSource(movieFunc, opt, nil)
	cfProc := app.Add(cfFunc, opt, dataChunk)
	aggCFProc := app.Add(aggCFFunc, opt, cfProc)

	mfProc := app.Add(mfFunc, opt, dataChunk, aggCFProc)

	// If server, stays here forever, otherwise keep going.
	app.Run()

	glog.Infof("num logical CPUs: %d", runtime.NumCPU())
	start := time.Now()
	y, ey := mfProc(numGDIterations) // the index is the # iterations
	if ey != nil {
		glog.Fatal(ey)
	}
	end := time.Now()
	d := end.Sub(start)
	glog.Infof("train duration: %v", d)

	app.Shutdown()
	return y.(*CF)
}
Пример #2
0
func EvalCF(dbTest string, config *occult.Config, cf *CF) {
	db, err := store.NewStore(dbTest)
	fatalIf(err)
	defer db.Close()

	opt := &EvalOptions{
		db:         db,
		cf:         cf,
		globalMean: cf.GlobalMean(),
		sqErr:      &SqErr{},
	}

	app := occult.NewApp(config)
	evalProc := app.AddSource(evalFunc, opt, nil)

	var i uint64
	for {
		v, e := evalProc(i)
		if e != nil && e != occult.ErrEndOfArray {
			glog.Fatal(e)
		}
		if v != nil {
			glog.V(5).Infof("chunk[%4d]: %v", i, v)
		}
		if e == occult.ErrEndOfArray {
			glog.V(3).Infof("end of array found at index %d", i)
			break
		}
		i++
	}

	n := float64(opt.sqErr.n)
	glog.Infof("N:%.0f, alpha:%.2f", n, cf.alpha)
	glog.Infof("%20s: %.4f", "Global Mean", math.Sqrt(opt.sqErr.globalMean/n))
	glog.Infof("%20s: %.4f", "Adj. User Mean", math.Sqrt(opt.sqErr.weightedUserMean/n))
	glog.Infof("%20s: %.4f", "Item Mean", math.Sqrt(opt.sqErr.weightedItemMean/n))
	glog.Infof("%20s: %.4f", "Simple MF", math.Sqrt(opt.sqErr.mf/n))
}