func (this *OptAgent) AgentInit(taskString string) { this.task, _ = rlglue.ParseTaskSpec(taskString) this.Cfg.NumSystems = len(this.task.Obs.Ints) this.mdp = NewSysMDP(this.Cfg) this.qt = discrete.NewQTable(this.task.Obs.Ints.Count(), this.task.Act.Ints.Count()) vi.ValueIteration(this.qt, this.mdp, 0.1) }
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon) } ra.lastState = nextState act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (ra *BebAgent) AgentEnd(reward float64) { learned := ra.rmdp.ObserveTerminal(ra.lastState, ra.lastAction, reward) if learned { vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon) } }