func (this *FObjBaggage) GetObjs(s discrete.State) (objs []discrete.State) { objs = make([]discrete.State, this.NumObjs) for i := range objs { objs[i] = s % discrete.State(this.ObjCount) s /= discrete.State(this.ObjCount) } return }
func (ra *RmaxFSSSAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner = 0 ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *Agent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { this.stepsWithPlanner = 0 this.lastState = discrete.State(this.mdp.GetTask().Obs.Ints.Index(obs.Ints())) this.Plan() act = rlglue.NewAction(this.mdp.GetTask().Act.Ints.Values(this.GetAction()), []float64{}, []byte{}) this.lastAction = discrete.Action(this.mdp.GetTask().Act.Ints.Index(act.Ints())) return }
func (this *Belief) Next(s discrete.State, a discrete.Action) (n discrete.State) { nv := make([]int32, len(this.learners)) for child, learner := range this.learners { nv[child] = learner.Next(s, a) } n = discrete.State(this.bg.task.Obs.Ints.Index(nv)) return }
func (this *MDPTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) { weights := make([]float64, this.MDP.NumStates()) for n := range this.MDP.S64() { //for n := uint64(0); n < this.MDP.S(); n++ { weights[n] = this.MDP.T(s, a, n) } n = discrete.State(stat.NextChoice(weights)) return }
func (this *FObjBaggage) GetState(objs []discrete.State) (s discrete.State) { values := make([]int32, len(this.Task.Obs.Ints)) for i, obj := range objs { objValues := this.ObjRanges.Values(obj.Hashcode()) copy(values[i*this.Dimensionality:(i+1)*this.Dimensionality], objValues) } s = discrete.State(this.Task.Obs.Ints.Index(values)) return }
func (this *Belief) GetState() discrete.State { values := make([]int32, len(task.Obs.Ints)) values[0] = this.x values[1] = this.y values[2] = this.dir for i, v := range this.belief { values[i+3] = GetIndex(v) } return discrete.State(task.Obs.Ints.Index(values)) }
func (this *DirSA) Next() (n discrete.State) { if this.weights == nil { this.weights = make([]float64, len(this.counts)) for i, c := range this.counts { this.weights[i] = c / this.total } } n = discrete.State(stat.NextChoice(this.weights)) return }
func (ra *BebAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { vi.ValueIteration(ra.qt, ra.rmdp, ra.Cfg.Epsilon) } ra.lastState = nextState act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *BFS3Agent) AgentStep(reward float64, state rlglue.Observation) (act rlglue.Action) { s := discrete.State(this.getStateIndex(state)) old := this.belief this.belief = this.belief.Update(this.lastAction, s, reward) if this.belief.LessThan(old) || old.LessThan(this.belief) { this.discoveries++ } this.lastAction = this.getAction() act = this.getIndexAction(this.lastAction) return }
func (this *BFS3Agent) AgentStart(state rlglue.Observation) (act rlglue.Action) { if this.fs3.Dump { println("AgentStart") } s := discrete.State(this.getStateIndex(state)) if s != this.belief.GetState() { this.belief.Teleport(s) } this.lastAction = this.getAction() act = this.getIndexAction(this.lastAction) return }
func (this *DirSA) Update(n discrete.State) (next *DirSA) { next = new(DirSA) next.counts = make([]float64, len(this.counts)) copy(next.counts, this.counts) if n >= discrete.State(len(next.counts)) { panic(fmt.Sprintf("%d for %d", n, len(next.counts))) } next.counts[n] += 1 next.total = this.total + 1 next.visits = this.visits + 1 next.hash = this.hash + n.Hashcode() return }
func (ra *RmaxFSSSAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { ra.stepsWithPlanner++ nextState := discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) learned := ra.rmdp.Observe(ra.lastState, ra.lastAction, nextState, reward) if learned { ra.Forget() } ra.lastState = nextState ra.Plan() act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.GetAction().Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *Posterior) Next(s discrete.State, a discrete.Action) (n discrete.State) { c := uint64(this.C.Get(int(s))) ck := c*this.bg.NumActions + a.Hashcode() hist := this.clusterData[ck] fhist := append([]float64{}, this.bg.Beta...) total := 0.0 for i, c := range hist { fhist[i] += float64(c) total += fhist[i] } for i := range fhist { fhist[i] /= total } o := discrete.State(stat.NextChoice(fhist)) n = this.bg.OutcomeToNext(s, o) return }
func (this *OptAgent) AgentStep(reward float64, obs rlglue.Observation) (act rlglue.Action) { s := discrete.State(this.task.Obs.Ints.Index(obs.Ints())) a := this.qt.Pi(s) act = rlglue.NewAction([]int32{int32(a)}, []float64{}, []byte{}) return }
func (ra *BebAgent) AgentStart(obs rlglue.Observation) (act rlglue.Action) { ra.lastState = discrete.State(ra.task.Obs.Ints.Index(obs.Ints())) act = rlglue.NewAction(ra.task.Act.Ints.Values(ra.qt.Pi(ra.lastState).Hashcode()), []float64{}, []byte{}) ra.lastAction = discrete.Action(ra.task.Act.Ints.Index(act.Ints())) return }
func (this *Environment) GetState() discrete.State { return discrete.State(this.hash) }
func (this *Env) EnvStart() (obs rlglue.Observation) { startState := discrete.State(stat.NextRange(int64(this.mdp.GetTask().Obs.Ints.Count()))) obs = rlglue.NewObservation(this.mdp.GetTask().Obs.Ints.Values(startState.Hashcode()), []float64{}, []byte{}) this.LastState = startState return }
func (this *Oracle) GetState() (state discrete.State) { return discrete.State(this.hash) }
func (this *Posterior) SampleRandomState() { s := discrete.State(stat.NextRange(int64(this.bg.NumStates))) this.ResampleState(s) }