Exemple #1
0
func TestEnvTraj(t *testing.T) {
	if false {
		return
	}
	stat.TimeSeed()
	defer nicetrace.Print()
	env := New()
	obsi := env.EnvStart()
	indexi := task.Obs.Ints.Index(obsi.Ints())
	b := NewBelief(make(MapBelief, 16))
	b.Teleport(indexi)
	fmt.Printf("%v\n", b.Hunter)
	do := func(what int32) bool {
		action := rlglue.NewAction([]int32{what}, []float64{}, []byte{})
		obs, r, t := env.EnvStep(action)
		fmt.Println(what, r)
		if t {
			fmt.Println("done")
			return false
		}
		index := task.Obs.Ints.Index(obs.Ints())
		bs := b.Update(uint64(what), index, r)
		b = bs.(*Belief)
		fmt.Printf("%v\n", b.Hunter)
		return true
	}
	guess := func() (what int32) {
		fmt.Scanf("%d", &what)
		return
	}
	for do(guess()) {
	}
}
Exemple #2
0
func (ra *RmaxFSSSAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	ra.stepsWithPlanner = 0
	ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	ra.Plan()
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Exemple #3
0
func (this *Agent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	this.stepsWithPlanner = 0
	this.lastState = discrete.State(this.mdp.GetTask().Obs.Ints.Index(obs.Ints()))
	this.Plan()
	act = rlglue.NewAction(this.mdp.GetTask().Act.Ints.Values(this.GetAction()), []float64{}, []byte{})
	this.lastAction = discrete.Action(this.mdp.GetTask().Act.Ints.Index(act.Ints()))
	return
}
Exemple #4
0
func (this *Environment) Next(action discrete.Action) (o discrete.Oracle, r float64) {
	act := rlglue.NewAction([]int32{int32(action)}, []float64{}, []byte{})
	next := new(Environment)
	*next = *this
	next.status = append([]bool{}, this.status...)
	_, r, _ = next.EnvStep(act)
	o = next
	return
}
Exemple #5
0
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward)
	if learned {
		vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon)
	}
	ra.lastState = nextState
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Exemple #6
0
func (this *Oracle) Next(action discrete.Action) (o discrete.Oracle, r float64) {
	avalues := this.Task.Act.Ints.Values(action.Hashcode())
	act := rlglue.NewAction(avalues, []float64{}, []byte{})
	next := new(Oracle)
	*next = *this
	next.Cans = append([]Can{}, this.Cans...)
	_, r, next.isTerminal = next.Env.EnvStep(act)
	next.rehash()
	o = next
	return
}
Exemple #7
0
func (ra *RmaxFSSSAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	ra.stepsWithPlanner++
	nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward)
	if learned {
		ra.Forget()
	}
	ra.lastState = nextState
	ra.Plan()
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Exemple #8
0
func (ra *BebAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Exemple #9
0
func (this *ROARAgent) GetAction() (act rlglue.Action) {
	index := uint64(rand.Int63n(int64(this.task.Obs.Ints.Count())))
	act = rlglue.NewAction(this.task.Obs.Ints.Values(index), []float64{}, []byte{})
	this.LastAct = act
	return
}
Exemple #10
0
func (this *BFS3Agent) getIndexAction(index discrete.Action) (act rlglue.Action) {
	return rlglue.NewAction(this.task.Act.Ints.Values(index.Hashcode()), []float64{}, []byte{})
}
Exemple #11
0
func (this *OptAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	s := discrete.State(this.task.Obs.Ints.Index(obs.Ints()))
	a := this.qt.Pi(s)
	act = rlglue.NewAction([]int32{int32(a)}, []float64{}, []byte{})
	return
}