func New(cfg Config, mdp discrete.MDP) (this *Agent) { this = new(Agent) this.cfg = cfg this.mdp = mdp this.mdpo = discrete.NewMDPOracle(this.mdp, 0) this.s = fsss.New() this.s.Cfg = this.cfg.FS3 this.s.NumActions = this.mdp.NumActions() this.s.Gamma = mdp.GetGamma() this.s.Vmin = this.mdp.GetTask().Reward.Min / (1 - this.s.Gamma) this.s.Vmax = this.mdp.GetTask().Reward.Max / (1 - this.s.Gamma) this.stepsWithPlanner = 0 return }
func BackupStateAction(qt *discrete.QTable, mdp discrete.MDP, s discrete.State, a discrete.Action) (error float64) { var nq float64 for n := range mdp.S64() { ev := mdp.T(s, a, n) ev *= qt.V(n) nq += ev } nq *= mdp.GetGamma() nq += mdp.R(s, a) error = math.Fabs(nq - qt.Q(s, a)) qt.SetQ(s, a, nq) return }