예제 #1
0
func (ra *RmaxFSSSAgent) Forget() {
	ra.mdpo = discrete.NewMDPOracle(ra.rmdp, ra.lastState)
	ra.s = fsss.New()
	ra.s.Cfg = ra.Cfg.FS3
	ra.s.NumActions = ra.rmdp.NumActions()
	ra.s.Gamma = ra.task.DiscountFactor
	if ra.s.Gamma == 1 {
		ra.s.Gamma = 0.9
	}
	ra.s.Vmin = ra.task.Reward.Min / (1 - ra.s.Gamma)
	ra.s.Vmax = 5
	ra.stepsWithPlanner = 0
}
예제 #2
0
func New(cfg Config, mdp discrete.MDP) (this *Agent) {
	this = new(Agent)
	this.cfg = cfg
	this.mdp = mdp
	this.mdpo = discrete.NewMDPOracle(this.mdp, 0)
	this.s = fsss.New()
	this.s.Cfg = this.cfg.FS3
	this.s.NumActions = this.mdp.NumActions()
	this.s.Gamma = mdp.GetGamma()
	this.s.Vmin = this.mdp.GetTask().Reward.Min / (1 - this.s.Gamma)
	this.s.Vmax = this.mdp.GetTask().Reward.Max / (1 - this.s.Gamma)
	this.stepsWithPlanner = 0
	return
}