示例#1
0
文件: opt.go 项目: skelterjohn/rlenv
func (this *OptAgent) AgentInit(taskString string) {
	this.task, _ = rlglue.ParseTaskSpec(taskString)
	this.Cfg.NumSystems = len(this.task.Obs.Ints)
	this.mdp = NewSysMDP(this.Cfg)
	this.qt = discrete.NewQTable(this.task.Obs.Ints.Count(), this.task.Act.Ints.Count())
	vi.ValueIteration(this.qt, this.mdp, 0.1)
}
示例#2
0
文件: beb.go 项目: skelterjohn/rlalg
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward)
	if learned {
		vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon)
	}
	ra.lastState = nextState
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
示例#3
0
文件: beb.go 项目: skelterjohn/rlalg
func (ra *BebAgent) AgentEnd(reward float64) {
	learned := ra.rmdp.ObserveTerminal(ra.lastState, ra.lastAction, reward)
	if learned {
		vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon)
	}
}