func (this *CRPReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { return this } ndr := new(CRPReward) *ndr = *this ndr.Known = make([]bool, len(this.Known)) copy(ndr.Known, this.Known) ndr.R = make([]float64, len(this.R)) copy(ndr.R, this.R) ndr.Known[index] = true ndr.R[index] = r ndr.countKnown++ ndr.SeenRewards = append([]float64{r}, this.SeenRewards...) ndr.Counts = append([]uint64{1}, this.Counts...) var seen bool for i, sr := range this.SeenRewards { if i != 0 && sr == r { seen = true ndr.Counts[i]++ break } } if seen { ndr.SeenRewards = ndr.SeenRewards[1:len(ndr.SeenRewards)] ndr.Counts = ndr.Counts[1:len(ndr.Counts)] } ndr.Total++ next = ndr return }
func (this *CRPReward) Next(s discrete.State, a discrete.Action) (r float64) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { r = this.R[index] return } if this.chooser == nil { if len(this.Counts) == 0 { this.chooser = func() int64 { return 0 } } else { normalizer := 1.0 / (float64(this.Total) + this.Alpha) weights := make([]float64, len(this.Counts)) for i := range weights { weights[i] = float64(this.Counts[i]) * normalizer } this.chooser = stat.Choice(weights) } } which := int(this.chooser()) if which == len(this.SeenRewards) { r = this.BaseSampler() } else { r = this.SeenRewards[which] } return }
func (this *FDMTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) { o := this.bg.NextToOutcome(s, n) k := s.Hashcode() + a.Hashcode()*this.bg.NumStates dsa := this.sas[k] if dsa == nil { dsa = NewDirSA(this.bg.Alpha) this.sas[k] = dsa } if this.bg.ForgetThreshold != 0 && dsa.visits >= this.bg.ForgetThreshold { next = this return } nextFDM := new(FDMTransition) nextFDM.bg = this.bg nextFDM.sas = make([]*DirSA, len(this.sas)) copy(nextFDM.sas, this.sas) nextFDM.sas[k] = dsa.Update(o) if nextFDM.sas[k].visits == this.bg.ForgetThreshold { nextFDM.sas[k].ForgetPrior(this.bg.Alpha) //fmt.Printf("%v\n", nextFDM.sas[k]) } nextFDM.hash = this.hash - this.sas[k].Hashcode() + nextFDM.sas[k].Hashcode() next = nextFDM return }
func (this *DeterministicReward) Next(s discrete.State, a discrete.Action) (r float64) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { r = this.R[index] return } r = this.BaseSampler() return }
func (this *FObjTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) { avalues := this.bg.Task.Act.Ints.Values(a.Hashcode()) which, act := avalues[0], avalues[1] sobjs := this.bg.GetObjs(s) nobjs := append([]discrete.State{}, sobjs...) nobjs[which] = this.ObjFDM.Next(sobjs[which], discrete.Action(act)) n = this.bg.GetState(nobjs) return }
func (this *Posterior) UpdatePosterior(s discrete.State, a discrete.Action, o discrete.State) (next *Posterior) { next = new(Posterior) *next = *this next.stateData = append([]SAHist{}, this.stateData...) next.clusterData = append([]SAHist{}, this.clusterData...) next.C = this.C.Copy() k := s.Hashcode()*this.bg.NumActions + a.Hashcode() next.stateData[k] = next.stateData[k].Incr(this.bg.NumOutcomes, o) return }
func (this *FDMTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) { k := s.Hashcode() + a.Hashcode()*this.bg.NumStates dsa := this.sas[k] if dsa == nil { dsa = NewDirSA(this.bg.Alpha) this.sas[k] = dsa } n = this.bg.OutcomeToNext(s, dsa.Next()) return }
func (this *Oracle) Next(action discrete.Action) (o discrete.Oracle, r float64) { avalues := this.Task.Act.Ints.Values(action.Hashcode()) act := rlglue.NewAction(avalues, []float64{}, []byte{}) next := new(Oracle) *next = *this next.Cans = append([]Can{}, this.Cans...) _, r, next.isTerminal = next.Env.EnvStep(act) next.rehash() o = next return }
func (this *FObjTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) { nt := new(FObjTransition) *nt = *this avalues := this.bg.Task.Act.Ints.Values(a.Hashcode()) which, act := avalues[0], avalues[1] sobjs := this.bg.GetObjs(s) nobjs := this.bg.GetObjs(n) nt.ObjFDM = this.ObjFDM.Update(sobjs[which], discrete.Action(act), nobjs[which]).(*FDMTransition) next = nt return }
func (this *BetaTerminal) Next(s discrete.State, a discrete.Action) (t bool) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { t = this.Term[index] return } prob := this.Alpha / (this.Alpha + this.Beta) if stat.NextUniform() < prob { t = true } return }
func (this *CountKnown) Update(s discrete.State, a discrete.Action) (next KnownBelief) { nk := new(CountKnown) nk.numStates = this.numStates nk.visits = make([]int, len(this.visits)) copy(nk.visits, this.visits) nk.threshold = this.threshold k := s.Hashcode() + nk.numStates*a.Hashcode() nk.visits[k]++ next = nk return }
func (this *RmaxReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.R[index] == r { return this } nrr := new(RmaxReward) *nrr = *this nrr.R = make([]float64, len(this.R)) copy(nrr.R, this.R) nrr.R[index] = r nrr.countKnown++ return nrr }
func (this *Posterior) Next(s discrete.State, a discrete.Action) (n discrete.State) { c := uint64(this.C.Get(int(s))) ck := c*this.bg.NumActions + a.Hashcode() hist := this.clusterData[ck] fhist := append([]float64{}, this.bg.Beta...) total := 0.0 for i, c := range hist { fhist[i] += float64(c) total += fhist[i] } for i := range fhist { fhist[i] /= total } o := discrete.State(stat.NextChoice(fhist)) n = this.bg.OutcomeToNext(s, o) return }
func (this *DeterministicReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { return this } ndr := new(DeterministicReward) *ndr = *this ndr.Known = make([]bool, len(this.Known)) copy(ndr.Known, this.Known) ndr.R = make([]float64, len(this.R)) copy(ndr.R, this.R) ndr.Known[index] = true ndr.R[index] = r ndr.countKnown++ next = ndr return }
func (this *DepLearner) Update(s discrete.State, a discrete.Action, o int32) (next *DepLearner) { k := a.Hashcode() + this.bg.numActions*s.Hashcode() next = new(DepLearner) *next = *this oi := this.bg.myRange.Index(o) next.history = append([]Histogram{}, this.history...) next.history[k] = next.history[k].Incr(oi) sv := next.bg.stateValues[s] mv := next.parents.CutValues(sv) ms := next.cutRanges.Index(mv) mk := a.Hashcode() + this.bg.numActions*ms next.mappedHistory = append([]Histogram{}, this.mappedHistory...) next.mappedLoglihood += next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha) next.mappedHistory[mk] = next.mappedHistory[mk].Incr(oi) next.mappedLoglihood -= next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha) next.hash += k << oi return }
func (this *DepLearner) Next(s discrete.State, a discrete.Action) (o int32) { sv := this.bg.stateValues[s] mv := this.parents.CutValues(sv) ms := this.cutRanges.Index(mv) mk := a.Hashcode() + this.bg.numActions*ms h := this.mappedHistory[mk] lls := make([]float64, len(h)) usePrior := h.Sum() < this.bg.cfg.M for i, c := range h { if usePrior { lls[i] = math.Log(this.bg.cfg.Alpha + float64(c)) } else { lls[i] = math.Log(float64(c)) } } oi := uint64(stat.NextLogChoice(lls)) o = this.bg.myRange.Value(oi) return }
func (this *BetaTerminal) Update(s discrete.State, a discrete.Action, t bool) (next TerminalBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { next = this return } nbt := new(BetaTerminal) *nbt = *this nbt.Known = append([]bool{}, this.Known...) nbt.Term = append([]bool{}, this.Term...) nbt.Known[index] = true nbt.Term[index] = t if t { nbt.Alpha++ } else { nbt.Beta++ } return nbt }
func (this *Belief) Update(s discrete.State, a discrete.Action, n discrete.State) (nextBelief bayes.TransitionBelief) { k := a.Hashcode() + s.Hashcode()*this.bg.numActions if this.totals[k] >= this.bg.cfg.M { nextBelief = this return } nv := this.bg.stateValues[n] next := new(Belief) *next = *this next.hash = 0 next.learners = append([]*DepLearner{}, this.learners...) for child := range this.learners { next.learners[child] = next.learners[child].Update(s, a, nv[child]) next.hash += next.learners[child].Hashcode() << uint(child) } next.totals = append([]uint64{}, this.totals...) next.totals[k]++ nextBelief = next return }
func (this *BFS3Agent) getIndexAction(index discrete.Action) (act rlglue.Action) { return rlglue.NewAction(this.task.Act.Ints.Values(index.Hashcode()), []float64{}, []byte{}) }
func (this *RmaxReward) Next(s discrete.State, a discrete.Action) (r float64) { return this.R[s.Hashcode()+this.NumStates*a.Hashcode()] }
func (this *CountKnown) Known(s discrete.State, a discrete.Action) (known bool) { k := s.Hashcode() + this.numStates*a.Hashcode() return this.visits[k] >= this.threshold }
func (this *SysMDP) R(s discrete.State, a discrete.Action) float64 { k := s.Hashcode() + a.Hashcode()*this.maxStates return this.r[k] }
func (this *SysMDP) T(s discrete.State, a discrete.Action, n discrete.State) float64 { k := s.Hashcode() + a.Hashcode()*this.maxStates return this.t[k][n] }