예제 #1
0
func (ra *RmaxFSSSAgent) Forget() {
	ra.mdpo = discrete.NewMDPOracle(ra.rmdp, ra.lastState)
	ra.s = fsss.New()
	ra.s.Cfg = ra.Cfg.FS3
	ra.s.NumActions = ra.rmdp.NumActions()
	ra.s.Gamma = ra.task.DiscountFactor
	if ra.s.Gamma == 1 {
		ra.s.Gamma = 0.9
	}
	ra.s.Vmin = ra.task.Reward.Min / (1 - ra.s.Gamma)
	ra.s.Vmax = 5
	ra.stepsWithPlanner = 0
}
예제 #2
0
func New(cfg Config, mdp discrete.MDP) (this *Agent) {
	this = new(Agent)
	this.cfg = cfg
	this.mdp = mdp
	this.mdpo = discrete.NewMDPOracle(this.mdp, 0)
	this.s = fsss.New()
	this.s.Cfg = this.cfg.FS3
	this.s.NumActions = this.mdp.NumActions()
	this.s.Gamma = mdp.GetGamma()
	this.s.Vmin = this.mdp.GetTask().Reward.Min / (1 - this.s.Gamma)
	this.s.Vmax = this.mdp.GetTask().Reward.Max / (1 - this.s.Gamma)
	this.stepsWithPlanner = 0
	return
}
예제 #3
0
파일: bfs3.go 프로젝트: skelterjohn/rlalg
func (this *BFS3Agent) ResetPlanner() {
	this.fs3 = fsss.New()
	this.fs3.Cfg = this.Cfg.FS3
	this.fs3.NumActions = uint64(this.task.Act.Ints.Count())
	if this.Cfg.CustomGammaV {
		this.fs3.Gamma = this.Cfg.Gamma
		this.fs3.Vmin = this.Cfg.Vmin
		this.fs3.Vmax = this.Cfg.Vmax
	} else {
		this.fs3.Gamma = this.task.DiscountFactor
		if this.fs3.Gamma == 1 {
			this.fs3.Gamma = 0.95
		}
		this.fs3.Vmin = this.task.Reward.Min / (1 - this.fs3.Gamma)
		this.fs3.Vmax = this.task.Reward.Max / (1 - this.fs3.Gamma)
	}
}