Beispiel #1
0
func New(cfg Config, mdp discrete.MDP) (this *Agent) {
	this = new(Agent)
	this.cfg = cfg
	this.mdp = mdp
	this.mdpo = discrete.NewMDPOracle(this.mdp, 0)
	this.s = fsss.New()
	this.s.Cfg = this.cfg.FS3
	this.s.NumActions = this.mdp.NumActions()
	this.s.Gamma = mdp.GetGamma()
	this.s.Vmin = this.mdp.GetTask().Reward.Min / (1 - this.s.Gamma)
	this.s.Vmax = this.mdp.GetTask().Reward.Max / (1 - this.s.Gamma)
	this.stepsWithPlanner = 0
	return
}
Beispiel #2
0
func ValueIteration(qt *discrete.QTable, mdp discrete.MDP, epsilon float64) (numIterations int) {
	//fmt.Fprintf(os.Stderr, "+ValueIteration\n")
	//fmt.Println(mdp.GetGamma())
	//defer fmt.Fprintf(os.Stderr, "-ValueIteration\n")
	var error float64
	for {
		numIterations += 1
		//fmt.Printf("iteration %d\n", numIterations)
		error = 0
		for s := range mdp.S64() {
			for a := range mdp.A64() {
				saError := BackupStateAction(qt, mdp, s, a)
				error = math.Fmax(error, saError)
			}
		}
		//fmt.Printf("QT\n%v\n", qt)
		//fmt.Fprintf(os.Stderr, "error %f\n%v\n", error, qt)
		if error < epsilon {
			return
		}
	}
	return
}
Beispiel #3
0
func BackupStateAction(qt *discrete.QTable, mdp discrete.MDP, s discrete.State, a discrete.Action) (error float64) {
	var nq float64

	for n := range mdp.S64() {
		ev := mdp.T(s, a, n)
		ev *= qt.V(n)
		nq += ev
	}
	nq *= mdp.GetGamma()
	nq += mdp.R(s, a)

	error = math.Fabs(nq - qt.Q(s, a))

	qt.SetQ(s, a, nq)

	return
}