Beispiel #1
0
func (this *FObjBaggage) GetObjs(s discrete.State) (objs []discrete.State) {
	objs = make([]discrete.State, this.NumObjs)
	for i := range objs {
		objs[i] = s % discrete.State(this.ObjCount)
		s /= discrete.State(this.ObjCount)
	}
	return
}
Beispiel #2
0
func (ra *RmaxFSSSAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	ra.stepsWithPlanner = 0
	ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	ra.Plan()
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Beispiel #3
0
func (this *Agent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	this.stepsWithPlanner = 0
	this.lastState = discrete.State(this.mdp.GetTask().Obs.Ints.Index(obs.Ints()))
	this.Plan()
	act = rlglue.NewAction(this.mdp.GetTask().Act.Ints.Values(this.GetAction()), []float64{}, []byte{})
	this.lastAction = discrete.Action(this.mdp.GetTask().Act.Ints.Index(act.Ints()))
	return
}
Beispiel #4
0
func (this *Belief) Next(s discrete.State, a discrete.Action) (n discrete.State) {
	nv := make([]int32, len(this.learners))
	for child, learner := range this.learners {
		nv[child] = learner.Next(s, a)
	}
	n = discrete.State(this.bg.task.Obs.Ints.Index(nv))
	return
}
Beispiel #5
0
func (this *MDPTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) {
	weights := make([]float64, this.MDP.NumStates())
	for n := range this.MDP.S64() {
		//for n := uint64(0); n < this.MDP.S(); n++ {
		weights[n] = this.MDP.T(s, a, n)
	}
	n = discrete.State(stat.NextChoice(weights))
	return
}
Beispiel #6
0
func (this *FObjBaggage) GetState(objs []discrete.State) (s discrete.State) {
	values := make([]int32, len(this.Task.Obs.Ints))
	for i, obj := range objs {
		objValues := this.ObjRanges.Values(obj.Hashcode())
		copy(values[i*this.Dimensionality:(i+1)*this.Dimensionality], objValues)
	}
	s = discrete.State(this.Task.Obs.Ints.Index(values))
	return
}
Beispiel #7
0
func (this *Belief) GetState() discrete.State {
	values := make([]int32, len(task.Obs.Ints))
	values[0] = this.x
	values[1] = this.y
	values[2] = this.dir
	for i, v := range this.belief {
		values[i+3] = GetIndex(v)
	}
	return discrete.State(task.Obs.Ints.Index(values))
}
Beispiel #8
0
func (this *DirSA) Next() (n discrete.State) {
	if this.weights == nil {
		this.weights = make([]float64, len(this.counts))
		for i, c := range this.counts {
			this.weights[i] = c / this.total
		}
	}
	n = discrete.State(stat.NextChoice(this.weights))
	return
}
Beispiel #9
0
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward)
	if learned {
		vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon)
	}
	ra.lastState = nextState
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Beispiel #10
0
func (this *BFS3Agent) AgentStep(reward float64, state rlglue.Observation) (act rlglue.Action) {
	s := discrete.State(this.getStateIndex(state))
	old := this.belief
	this.belief = this.belief.Update(this.lastAction, s, reward)
	if this.belief.LessThan(old) || old.LessThan(this.belief) {
		this.discoveries++
	}
	this.lastAction = this.getAction()
	act = this.getIndexAction(this.lastAction)
	return
}
Beispiel #11
0
func (this *BFS3Agent) AgentStart(state rlglue.Observation) (act rlglue.Action) {
	if this.fs3.Dump {
		println("AgentStart")
	}
	s := discrete.State(this.getStateIndex(state))
	if s != this.belief.GetState() {
		this.belief.Teleport(s)
	}
	this.lastAction = this.getAction()
	act = this.getIndexAction(this.lastAction)
	return
}
Beispiel #12
0
func (this *DirSA) Update(n discrete.State) (next *DirSA) {
	next = new(DirSA)
	next.counts = make([]float64, len(this.counts))
	copy(next.counts, this.counts)
	if n >= discrete.State(len(next.counts)) {
		panic(fmt.Sprintf("%d for %d", n, len(next.counts)))
	}
	next.counts[n] += 1
	next.total = this.total + 1
	next.visits = this.visits + 1
	next.hash = this.hash + n.Hashcode()
	return
}
Beispiel #13
0
func (ra *RmaxFSSSAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	ra.stepsWithPlanner++
	nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward)
	if learned {
		ra.Forget()
	}
	ra.lastState = nextState
	ra.Plan()
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Beispiel #14
0
func (this *Posterior) Next(s discrete.State, a discrete.Action) (n discrete.State) {
	c := uint64(this.C.Get(int(s)))
	ck := c*this.bg.NumActions + a.Hashcode()
	hist := this.clusterData[ck]
	fhist := append([]float64{}, this.bg.Beta...)
	total := 0.0
	for i, c := range hist {
		fhist[i] += float64(c)
		total += fhist[i]
	}
	for i := range fhist {
		fhist[i] /= total
	}
	o := discrete.State(stat.NextChoice(fhist))
	n = this.bg.OutcomeToNext(s, o)
	return
}
Beispiel #15
0
func (this *OptAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) {
	s := discrete.State(this.task.Obs.Ints.Index(obs.Ints()))
	a := this.qt.Pi(s)
	act = rlglue.NewAction([]int32{int32(a)}, []float64{}, []byte{})
	return
}
Beispiel #16
0
func (ra *BebAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) {
	ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints()))
	act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{})
	ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints()))
	return
}
Beispiel #17
0
func (this *Environment) GetState() discrete.State {
	return discrete.State(this.hash)
}
Beispiel #18
0
func (this *Env) EnvStart() (obs rlglue.Observation) {
	startState := discrete.State(stat.NextRange(int64(this.mdp.GetTask().Obs.Ints.Count())))
	obs = rlglue.NewObservation(this.mdp.GetTask().Obs.Ints.Values(startState.Hashcode()), []float64{}, []byte{})
	this.LastState = startState
	return
}
Beispiel #19
0
func (this *Oracle) GetState() (state discrete.State) {
	return discrete.State(this.hash)
}
Beispiel #20
0
func (this *Posterior) SampleRandomState() {
	s := discrete.State(stat.NextRange(int64(this.bg.NumStates)))
	this.ResampleState(s)
}