func New(cfg Config, mdp discrete.MDP) (this *Agent) { this = new(Agent) this.cfg = cfg this.mdp = mdp this.mdpo = discrete.NewMDPOracle(this.mdp, 0) this.s = fsss.New() this.s.Cfg = this.cfg.FS3 this.s.NumActions = this.mdp.NumActions() this.s.Gamma = mdp.GetGamma() this.s.Vmin = this.mdp.GetTask().Reward.Min / (1 - this.s.Gamma) this.s.Vmax = this.mdp.GetTask().Reward.Max / (1 - this.s.Gamma) this.stepsWithPlanner = 0 return }
func ValueIteration(qt *discrete.QTable, mdp discrete.MDP, epsilon float64) (numIterations int) { //fmt.Fprintf(os.Stderr, "+ValueIteration\n") //fmt.Println(mdp.GetGamma()) //defer fmt.Fprintf(os.Stderr, "-ValueIteration\n") var error float64 for { numIterations += 1 //fmt.Printf("iteration %d\n", numIterations) error = 0 for s := range mdp.S64() { for a := range mdp.A64() { saError := BackupStateAction(qt, mdp, s, a) error = math.Fmax(error, saError) } } //fmt.Printf("QT\n%v\n", qt) //fmt.Fprintf(os.Stderr, "error %f\n%v\n", error, qt) if error < epsilon { return } } return }
func BackupStateAction(qt *discrete.QTable, mdp discrete.MDP, s discrete.State, a discrete.Action) (error float64) { var nq float64 for n := range mdp.S64() { ev := mdp.T(s, a, n) ev *= qt.V(n) nq += ev } nq *= mdp.GetGamma() nq += mdp.R(s, a) error = math.Fabs(nq - qt.Q(s, a)) qt.SetQ(s, a, nq) return }