Beispiel #1
0
func (this *ROARAgent) AgentStep(reward float64, obs rlglue.Observation) rlglue.Action {
	last := matrix.MakeDenseMatrix(this.LastObs.Doubles(), this.numFeatures, 1)
	current := matrix.MakeDenseMatrix(obs.Doubles(), this.numFeatures, 1)
	rm := matrix.MakeDenseMatrix([]float64{reward}, 1, 1)
	outcome, _ := current.MinusDense(last)
	sor, _ := last.Augment(outcome)
	sor, _ = sor.Augment(rm)
	actionIndex := this.task.Act.Ints.Index(this.LastAct.Ints())
	this.rpost[actionIndex].Insert(sor)
	this.LastObs = obs
	return this.GetAction()
}