func (ra *RmaxFSSSAgent) Forget() { ra.mdpo = discrete.NewMDPOracle(ra.rmdp, ra.lastState) ra.s = fsss.New() ra.s.Cfg = ra.Cfg.FS3 ra.s.NumActions = ra.rmdp.NumActions() ra.s.Gamma = ra.task.DiscountFactor if ra.s.Gamma == 1 { ra.s.Gamma = 0.9 } ra.s.Vmin = ra.task.Reward.Min / (1 - ra.s.Gamma) ra.s.Vmax = 5 ra.stepsWithPlanner = 0 }
func New(cfg Config, mdp discrete.MDP) (this *Agent) { this = new(Agent) this.cfg = cfg this.mdp = mdp this.mdpo = discrete.NewMDPOracle(this.mdp, 0) this.s = fsss.New() this.s.Cfg = this.cfg.FS3 this.s.NumActions = this.mdp.NumActions() this.s.Gamma = mdp.GetGamma() this.s.Vmin = this.mdp.GetTask().Reward.Min / (1 - this.s.Gamma) this.s.Vmax = this.mdp.GetTask().Reward.Max / (1 - this.s.Gamma) this.stepsWithPlanner = 0 return }
func (this *BFS3Agent) ResetPlanner() { this.fs3 = fsss.New() this.fs3.Cfg = this.Cfg.FS3 this.fs3.NumActions = uint64(this.task.Act.Ints.Count()) if this.Cfg.CustomGammaV { this.fs3.Gamma = this.Cfg.Gamma this.fs3.Vmin = this.Cfg.Vmin this.fs3.Vmax = this.Cfg.Vmax } else { this.fs3.Gamma = this.task.DiscountFactor if this.fs3.Gamma == 1 { this.fs3.Gamma = 0.95 } this.fs3.Vmin = this.task.Reward.Min / (1 - this.fs3.Gamma) this.fs3.Vmax = this.task.Reward.Max / (1 - this.fs3.Gamma) } }