func TestEnvTraj(t *testing.T) { if false { return } stat.TimeSeed() defer nicetrace.Print() env := New() obsi := env.EnvStart() indexi := task.Obs.Ints.Index(obsi.Ints()) b := NewBelief(make(MapBelief, 16)) b.Teleport(indexi) fmt.Printf("%v\n", b.Hunter) do := func(what int32) bool { action := rlglue.NewAction([]int32{what}, []float64{}, []byte{}) obs, r, t := env.EnvStep(action) fmt.Println(what, r) if t { fmt.Println("done") return false } index := task.Obs.Ints.Index(obs.Ints()) bs := b.Update(uint64(what), index, r) b = bs.(*Belief) fmt.Printf("%v\n", b.Hunter) return true } guess := func() (what int32) { fmt.Scanf("%d", &what) return } for do(guess()) { } }
func (ra *RmaxFSSSAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner = 0 ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *Agent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { this.stepsWithPlanner = 0 this.lastState = discrete.State(this.mdp.GetTask().Obs.Ints.Index(obs.Ints())) this.Plan() act = rlglue.NewAction(this.mdp.GetTask().Act.Ints.Values(this.GetAction()), []float64{}, []byte{}) this.lastAction = discrete.Action(this.mdp.GetTask().Act.Ints.Index(act.Ints())) return }
func (this *Environment) Next(action discrete.Action) (o discrete.Oracle, r float64) { act := rlglue.NewAction([]int32{int32(action)}, []float64{}, []byte{}) next := new(Environment) *next = *this next.status = append([]bool{}, this.status...) _, r, _ = next.EnvStep(act) o = next return }
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon) } ra.lastState = nextState act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *Oracle) Next(action discrete.Action) (o discrete.Oracle, r float64) { avalues := this.Task.Act.Ints.Values(action.Hashcode()) act := rlglue.NewAction(avalues, []float64{}, []byte{}) next := new(Oracle) *next = *this next.Cans = append([]Can{}, this.Cans...) _, r, next.isTerminal = next.Env.EnvStep(act) next.rehash() o = next return }
func (ra *RmaxFSSSAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner++ nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { ra.Forget() } ra.lastState = nextState ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (ra *BebAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *ROARAgent) GetAction() (act rlglue.Action) { index := uint64(rand.Int63n(int64(this.task.Obs.Ints.Count()))) act = rlglue.NewAction(this.task.Obs.Ints.Values(index), []float64{}, []byte{}) this.LastAct = act return }
func (this *BFS3Agent) getIndexAction(index discrete.Action) (act rlglue.Action) { return rlglue.NewAction(this.task.Act.Ints.Values(index.Hashcode()), []float64{}, []byte{}) }
func (this *OptAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { s := discrete.State(this.task.Obs.Ints.Index(obs.Ints())) a := this.qt.Pi(s) act = rlglue.NewAction([]int32{int32(a)}, []float64{}, []byte{}) return }