func (this *Environment) Teleport(state discrete.State) { ints := this.task.Obs.Ints.Values(state.Hashcode()) for i, v := range ints { this.status[i] = v == 1 } this.hash = state.Hashcode() }
func (this *CRPReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { return this } ndr := new(CRPReward) *ndr = *this ndr.Known = make([]bool, len(this.Known)) copy(ndr.Known, this.Known) ndr.R = make([]float64, len(this.R)) copy(ndr.R, this.R) ndr.Known[index] = true ndr.R[index] = r ndr.countKnown++ ndr.SeenRewards = append([]float64{r}, this.SeenRewards...) ndr.Counts = append([]uint64{1}, this.Counts...) var seen bool for i, sr := range this.SeenRewards { if i != 0 && sr == r { seen = true ndr.Counts[i]++ break } } if seen { ndr.SeenRewards = ndr.SeenRewards[1:len(ndr.SeenRewards)] ndr.Counts = ndr.Counts[1:len(ndr.Counts)] } ndr.Total++ next = ndr return }
func (this *FDMTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) { o := this.bg.NextToOutcome(s, n) k := s.Hashcode() + a.Hashcode()*this.bg.NumStates dsa := this.sas[k] if dsa == nil { dsa = NewDirSA(this.bg.Alpha) this.sas[k] = dsa } if this.bg.ForgetThreshold != 0 && dsa.visits >= this.bg.ForgetThreshold { next = this return } nextFDM := new(FDMTransition) nextFDM.bg = this.bg nextFDM.sas = make([]*DirSA, len(this.sas)) copy(nextFDM.sas, this.sas) nextFDM.sas[k] = dsa.Update(o) if nextFDM.sas[k].visits == this.bg.ForgetThreshold { nextFDM.sas[k].ForgetPrior(this.bg.Alpha) //fmt.Printf("%v\n", nextFDM.sas[k]) } nextFDM.hash = this.hash - this.sas[k].Hashcode() + nextFDM.sas[k].Hashcode() next = nextFDM return }
func (this *CRPReward) Next(s discrete.State, a discrete.Action) (r float64) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { r = this.R[index] return } if this.chooser == nil { if len(this.Counts) == 0 { this.chooser = func() int64 { return 0 } } else { normalizer := 1.0 / (float64(this.Total) + this.Alpha) weights := make([]float64, len(this.Counts)) for i := range weights { weights[i] = float64(this.Counts[i]) * normalizer } this.chooser = stat.Choice(weights) } } which := int(this.chooser()) if which == len(this.SeenRewards) { r = this.BaseSampler() } else { r = this.SeenRewards[which] } return }
func (this *DeterministicReward) Next(s discrete.State, a discrete.Action) (r float64) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { r = this.R[index] return } r = this.BaseSampler() return }
func (this *Oracle) Teleport(state discrete.State) { ints := this.Task.Obs.Ints.Values(state.Hashcode()) for i := range this.Cans { this.Cans[i].Painted = ints[i*4] == 1 this.Cans[i].Polished = ints[i*4+1] == 1 this.Cans[i].Scratched = ints[i*4+2] == 1 this.Cans[i].Done = ints[i*4+3] == 1 } this.hash = state.Hashcode() }
func (this *Posterior) UpdatePosterior(s discrete.State, a discrete.Action, o discrete.State) (next *Posterior) { next = new(Posterior) *next = *this next.stateData = append([]SAHist{}, this.stateData...) next.clusterData = append([]SAHist{}, this.clusterData...) next.C = this.C.Copy() k := s.Hashcode()*this.bg.NumActions + a.Hashcode() next.stateData[k] = next.stateData[k].Incr(this.bg.NumOutcomes, o) return }
func (this *SysMDP) computeR(s discrete.State, a discrete.Action) (r float64) { sv := this.Task.Obs.Ints.Values(s.Hashcode()) for _, v := range sv { r += float64(v) } if int(a) < this.Cfg.NumSystems { r -= 1 } return }
func (this *FDMTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) { k := s.Hashcode() + a.Hashcode()*this.bg.NumStates dsa := this.sas[k] if dsa == nil { dsa = NewDirSA(this.bg.Alpha) this.sas[k] = dsa } n = this.bg.OutcomeToNext(s, dsa.Next()) return }
func (this *Belief) Teleport(state discrete.State) { this.hash = state.Hashcode() indices := task.Obs.Ints.Values(state.Hashcode()) for i, ii := range indices[3:] { indices[i+3] = GetValue(ii) } this.x = indices[0] this.y = indices[1] this.dir = indices[2] this.belief = indices[3:] return }
func (this *Posterior) ResampleState(s discrete.State) { plls := roar.CRPPrior(this.bg.Alpha, this.C) for c := range plls { ck := uint64(c) * this.bg.NumActions Oc := this.clusterData[ck : ck+this.bg.NumActions] sk := s.Hashcode() * this.bg.NumActions Os := this.clusterData[sk : sk+this.bg.NumActions] plls[c] += InsertLoglihood(this.bg.NumActions, this.bg.NumOutcomes, this.bg.Beta, Oc, Os) } newCluster := uint(roar.LogChoice(plls)) this.InsertState(s, newCluster) }
func (this *BetaTerminal) Next(s discrete.State, a discrete.Action) (t bool) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { t = this.Term[index] return } prob := this.Alpha / (this.Alpha + this.Beta) if stat.NextUniform() < prob { t = true } return }
func (this *CountKnown) Update(s discrete.State, a discrete.Action) (next KnownBelief) { nk := new(CountKnown) nk.numStates = this.numStates nk.visits = make([]int, len(this.visits)) copy(nk.visits, this.visits) nk.threshold = this.threshold k := s.Hashcode() + nk.numStates*a.Hashcode() nk.visits[k]++ next = nk return }
func (this *DirSA) Update(n discrete.State) (next *DirSA) { next = new(DirSA) next.counts = make([]float64, len(this.counts)) copy(next.counts, this.counts) if n >= discrete.State(len(next.counts)) { panic(fmt.Sprintf("%d for %d", n, len(next.counts))) } next.counts[n] += 1 next.total = this.total + 1 next.visits = this.visits + 1 next.hash = this.hash + n.Hashcode() return }
func (this *RmaxReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.R[index] == r { return this } nrr := new(RmaxReward) *nrr = *this nrr.R = make([]float64, len(this.R)) copy(nrr.R, this.R) nrr.R[index] = r nrr.countKnown++ return nrr }
func (this *DeterministicReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { return this } ndr := new(DeterministicReward) *ndr = *this ndr.Known = make([]bool, len(this.Known)) copy(ndr.Known, this.Known) ndr.R = make([]float64, len(this.R)) copy(ndr.R, this.R) ndr.Known[index] = true ndr.R[index] = r ndr.countKnown++ next = ndr return }
func (this *DepLearner) Update(s discrete.State, a discrete.Action, o int32) (next *DepLearner) { k := a.Hashcode() + this.bg.numActions*s.Hashcode() next = new(DepLearner) *next = *this oi := this.bg.myRange.Index(o) next.history = append([]Histogram{}, this.history...) next.history[k] = next.history[k].Incr(oi) sv := next.bg.stateValues[s] mv := next.parents.CutValues(sv) ms := next.cutRanges.Index(mv) mk := a.Hashcode() + this.bg.numActions*ms next.mappedHistory = append([]Histogram{}, this.mappedHistory...) next.mappedLoglihood += next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha) next.mappedHistory[mk] = next.mappedHistory[mk].Incr(oi) next.mappedLoglihood -= next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha) next.hash += k << oi return }
func (this *BetaTerminal) Update(s discrete.State, a discrete.Action, t bool) (next TerminalBelief) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { next = this return } nbt := new(BetaTerminal) *nbt = *this nbt.Known = append([]bool{}, this.Known...) nbt.Term = append([]bool{}, this.Term...) nbt.Known[index] = true nbt.Term[index] = t if t { nbt.Alpha++ } else { nbt.Beta++ } return nbt }
func (this *Belief) Update(s discrete.State, a discrete.Action, n discrete.State) (nextBelief bayes.TransitionBelief) { k := a.Hashcode() + s.Hashcode()*this.bg.numActions if this.totals[k] >= this.bg.cfg.M { nextBelief = this return } nv := this.bg.stateValues[n] next := new(Belief) *next = *this next.hash = 0 next.learners = append([]*DepLearner{}, this.learners...) for child := range this.learners { next.learners[child] = next.learners[child].Update(s, a, nv[child]) next.hash += next.learners[child].Hashcode() << uint(child) } next.totals = append([]uint64{}, this.totals...) next.totals[k]++ nextBelief = next return }
func (this *SysMDP) computeT(s discrete.State, a discrete.Action, n discrete.State) (p float64) { sv := this.Task.Obs.Ints.Values(s.Hashcode()) nv := this.Task.Obs.Ints.Values(n.Hashcode()) p = 1 for i, no := range nv { var fp float64 if a == discrete.Action(i) { fp = 0 } else { fp = this.Cfg.FailBase li := (i + this.Cfg.NumSystems - 1) % this.Cfg.NumSystems ri := (i + 1) % this.Cfg.NumSystems ls := sv[li] == 1 rs := sv[ri] == 1 if li < i { ls = nv[li] == 1 } if !ls { fp += this.Cfg.FailIncr } if !rs { fp += this.Cfg.FailIncr } } if sv[i] == 1 || a == discrete.Action(i) { if no == 0 { p *= fp } else { p *= 1 - fp } } else { if no == 0 { p *= this.Cfg.FailStay } else { p *= 1 - this.Cfg.FailStay } } } return }
func (this *SysMDP) R(s discrete.State, a discrete.Action) float64 { k := s.Hashcode() + a.Hashcode()*this.maxStates return this.r[k] }
func (this *RmaxReward) Next(s discrete.State, a discrete.Action) (r float64) { return this.R[s.Hashcode()+this.NumStates*a.Hashcode()] }
func (this *CountKnown) Known(s discrete.State, a discrete.Action) (known bool) { k := s.Hashcode() + this.numStates*a.Hashcode() return this.visits[k] >= this.threshold }
func (this *SysMDP) T(s discrete.State, a discrete.Action, n discrete.State) float64 { k := s.Hashcode() + a.Hashcode()*this.maxStates return this.t[k][n] }