Beispiel #1
0
func writeStore(f *zip.File, nodeID int) (dbName string) {

	rc, e := f.Open()
	if e != nil {
		glog.Fatal(e)
	}
	defer rc.Close()
	scanner := bufio.NewScanner(rc)
	// Create a custom split function by wrapping the existing ScanLines function.
	split := func(data []byte, atEOF bool) (advance int, line []byte, err error) {
		advance, line, err = bufio.ScanLines(data, atEOF)
		if err == nil && line != nil {
			// can validate here and return error.
		}
		return
	}
	// Set the split function for the scanning operation.
	scanner.Split(split)

	// create store
	name := path.Base(f.Name) + "-" + strconv.Itoa(nodeID)
	dbName = path.Join(OutDir, name)

	// Return if db exists.
	if _, err := os.Stat(dbName); err == nil {
		glog.Infof("db %s already exist, skipping...", dbName)
		return dbName
	}

	glog.Infof("creating store %s", dbName)
	db, err := store.NewStore(dbName)
	fatalIf(err)
	defer db.Close()

	var key uint64
	for scanner.Scan() {
		newObs := Obs{}
		fields := strings.Fields(scanner.Text())
		newObs.User, e = strconv.Atoi(fields[0])
		fatalIf(e)
		newObs.Item, e = strconv.Atoi(fields[1])
		fatalIf(e)
		newObs.Rating, e = strconv.Atoi(fields[2])
		fatalIf(e)
		var io interface{} = newObs
		fatalIf(db.Put(key, &io))
		key++
	}
	if err = scanner.Err(); err != nil {
		glog.Fatalf("Invalid input: %s", err)
	}
	glog.Infof("wrote %d records", key)
	return dbName
}
Beispiel #2
0
// the app
func TrainCF(dbName string, config *occult.Config, chunkSize int) *CF {

	var db *store.Store
	var err error

	db, err = store.NewStore(dbName)
	fatalIf(err)
	defer db.Close()

	var numGDIterations uint64 = 40
	opt := &Options{
		db:             db,
		chunkSize:      chunkSize,
		regularization: 0.1,
		learnRate:      0.01,
		numFactors:     4,
		meanNorm:       false,
		alpha:          1,
	}

	app := occult.NewApp(config)
	dataChunk := app.AddSource(movieFunc, opt, nil)
	cfProc := app.Add(cfFunc, opt, dataChunk)
	aggCFProc := app.Add(aggCFFunc, opt, cfProc)

	mfProc := app.Add(mfFunc, opt, dataChunk, aggCFProc)

	// If server, stays here forever, otherwise keep going.
	app.Run()

	glog.Infof("num logical CPUs: %d", runtime.NumCPU())
	start := time.Now()
	y, ey := mfProc(numGDIterations) // the index is the # iterations
	if ey != nil {
		glog.Fatal(ey)
	}
	end := time.Now()
	d := end.Sub(start)
	glog.Infof("train duration: %v", d)

	app.Shutdown()
	return y.(*CF)
}
Beispiel #3
0
func EvalCF(dbTest string, config *occult.Config, cf *CF) {
	db, err := store.NewStore(dbTest)
	fatalIf(err)
	defer db.Close()

	opt := &EvalOptions{
		db:         db,
		cf:         cf,
		globalMean: cf.GlobalMean(),
		sqErr:      &SqErr{},
	}

	app := occult.NewApp(config)
	evalProc := app.AddSource(evalFunc, opt, nil)

	var i uint64
	for {
		v, e := evalProc(i)
		if e != nil && e != occult.ErrEndOfArray {
			glog.Fatal(e)
		}
		if v != nil {
			glog.V(5).Infof("chunk[%4d]: %v", i, v)
		}
		if e == occult.ErrEndOfArray {
			glog.V(3).Infof("end of array found at index %d", i)
			break
		}
		i++
	}

	n := float64(opt.sqErr.n)
	glog.Infof("N:%.0f, alpha:%.2f", n, cf.alpha)
	glog.Infof("%20s: %.4f", "Global Mean", math.Sqrt(opt.sqErr.globalMean/n))
	glog.Infof("%20s: %.4f", "Adj. User Mean", math.Sqrt(opt.sqErr.weightedUserMean/n))
	glog.Infof("%20s: %.4f", "Item Mean", math.Sqrt(opt.sqErr.weightedItemMean/n))
	glog.Infof("%20s: %.4f", "Simple MF", math.Sqrt(opt.sqErr.mf/n))
}