예제 #1
0
파일: fl.go 프로젝트: postfix/rlbayes
func GetCoffeePrior(cfg Config) bfs3.Prior {
	return func(task *rlglue.TaskSpec) (prior bayes.BeliefState) {
		mdp := coffee.NewMDP(cfg.Coffee)
		var transition bayes.TransitionBelief = dl.NewBelief(cfg.DL, task)
		if cfg.FDM {
			bg := new(bayes.FDMTransitionBaggage)
			bg.NumStates = task.Obs.Ints.Count()
			bg.NumActions = task.Act.Ints.Count()
			bg.NextToOutcome = func(s discrete.State, n discrete.State) discrete.State {
				return n
			}
			bg.OutcomeToNext = bg.NextToOutcome
			bg.Alpha = make([]float64, bg.NumStates)
			for i := range bg.Alpha {
				bg.Alpha[i] = .1
			}
			bg.ForgetThreshold = cfg.N
			transition = bayes.NewFDMTransition(bg)
		}
		reward := &bayes.MDPReward{mdp}
		terminal := &bayes.MDPTerminal{&bayes.MDPTransition{mdp}}
		prior = bayes.NewBelief(0, reward, transition, terminal, nil)
		return
	}
}
예제 #2
0
파일: fs3cof.go 프로젝트: skelterjohn/rlenv
func GetTruthFunc(cfg coffee.Config) bfs3.Prior {
	return func(task *rlglue.TaskSpec) (prior bayes.BeliefState) {
		mdp := coffee.NewMDP(cfg)
		transition := &bayes.MDPTransition{mdp}
		reward := &bayes.MDPReward{mdp}
		terminal := &bayes.MDPTerminal{transition}
		prior = bayes.NewBelief(0, reward, transition, terminal, nil)
		return
	}
}