func (ra *RmaxFSSSAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner = 0 ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *Agent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { this.stepsWithPlanner = 0 this.lastState = discrete.State(this.mdp.GetTask().Obs.Ints.Index(obs.Ints())) this.Plan() act = rlglue.NewAction(this.mdp.GetTask().Act.Ints.Values(this.GetAction()), []float64{}, []byte{}) this.lastAction = discrete.Action(this.mdp.GetTask().Act.Ints.Index(act.Ints())) return }
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon) } ra.lastState = nextState act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *ROARAgent) AgentStep(reward float64, obs rlglue.Observation) rlglue.Action { last := matrix.MakeDenseMatrix(this.LastObs.Doubles(), this.numFeatures, 1) current := matrix.MakeDenseMatrix(obs.Doubles(), this.numFeatures, 1) rm := matrix.MakeDenseMatrix([]float64{reward}, 1, 1) outcome, _ := current.MinusDense(last) sor, _ := last.Augment(outcome) sor, _ = sor.Augment(rm) actionIndex := this.task.Act.Ints.Index(this.LastAct.Ints()) this.rpost[actionIndex].Insert(sor) this.LastObs = obs return this.GetAction() }
func (ra *RmaxFSSSAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner++ nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { ra.Forget() } ra.lastState = nextState ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (ra *BebAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *BFS3Agent) getStateIndex(state rlglue.Observation) (index uint64) { return this.task.Obs.Ints.Index(state.Ints()) }
func (this *OptAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { s := discrete.State(this.task.Obs.Ints.Index(obs.Ints())) a := this.qt.Pi(s) act = rlglue.NewAction([]int32{int32(a)}, []float64{}, []byte{}) return }