コード例 #1
0
ファイル: bfs3agent.go プロジェクト: skelterjohn/rlenv
func (this *Environment) Teleport(state discrete.State) {
	ints := this.task.Obs.Ints.Values(state.Hashcode())
	for i, v := range ints {
		this.status[i] = v == 1
	}
	this.hash = state.Hashcode()
}
コード例 #2
0
ファイル: reward.go プロジェクト: postfix/rlbayes
func (this *CRPReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		return this
	}
	ndr := new(CRPReward)
	*ndr = *this
	ndr.Known = make([]bool, len(this.Known))
	copy(ndr.Known, this.Known)
	ndr.R = make([]float64, len(this.R))
	copy(ndr.R, this.R)
	ndr.Known[index] = true
	ndr.R[index] = r
	ndr.countKnown++

	ndr.SeenRewards = append([]float64{r}, this.SeenRewards...)
	ndr.Counts = append([]uint64{1}, this.Counts...)
	var seen bool
	for i, sr := range this.SeenRewards {
		if i != 0 && sr == r {
			seen = true
			ndr.Counts[i]++
			break
		}
	}
	if seen {
		ndr.SeenRewards = ndr.SeenRewards[1:len(ndr.SeenRewards)]
		ndr.Counts = ndr.Counts[1:len(ndr.Counts)]
	}

	ndr.Total++

	next = ndr
	return
}
コード例 #3
0
ファイル: transition.go プロジェクト: postfix/rlbayes
func (this *FDMTransition) Update(s discrete.State, a discrete.Action, n discrete.State) (next TransitionBelief) {
	o := this.bg.NextToOutcome(s, n)
	k := s.Hashcode() + a.Hashcode()*this.bg.NumStates
	dsa := this.sas[k]
	if dsa == nil {
		dsa = NewDirSA(this.bg.Alpha)
		this.sas[k] = dsa
	}

	if this.bg.ForgetThreshold != 0 && dsa.visits >= this.bg.ForgetThreshold {
		next = this
		return
	}

	nextFDM := new(FDMTransition)
	nextFDM.bg = this.bg
	nextFDM.sas = make([]*DirSA, len(this.sas))
	copy(nextFDM.sas, this.sas)
	nextFDM.sas[k] = dsa.Update(o)
	if nextFDM.sas[k].visits == this.bg.ForgetThreshold {
		nextFDM.sas[k].ForgetPrior(this.bg.Alpha)
		//fmt.Printf("%v\n", nextFDM.sas[k])
	}
	nextFDM.hash = this.hash - this.sas[k].Hashcode() + nextFDM.sas[k].Hashcode()
	next = nextFDM

	return
}
コード例 #4
0
ファイル: reward.go プロジェクト: postfix/rlbayes
func (this *CRPReward) Next(s discrete.State, a discrete.Action) (r float64) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		r = this.R[index]
		return
	}

	if this.chooser == nil {
		if len(this.Counts) == 0 {
			this.chooser = func() int64 { return 0 }
		} else {
			normalizer := 1.0 / (float64(this.Total) + this.Alpha)
			weights := make([]float64, len(this.Counts))
			for i := range weights {
				weights[i] = float64(this.Counts[i]) * normalizer
			}
			this.chooser = stat.Choice(weights)
		}
	}

	which := int(this.chooser())
	if which == len(this.SeenRewards) {
		r = this.BaseSampler()
	} else {
		r = this.SeenRewards[which]
	}

	return
}
コード例 #5
0
ファイル: reward.go プロジェクト: postfix/rlbayes
func (this *DeterministicReward) Next(s discrete.State, a discrete.Action) (r float64) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		r = this.R[index]
		return
	}
	r = this.BaseSampler()
	return
}
コード例 #6
0
ファイル: paint.go プロジェクト: skelterjohn/rlenv
func (this *Oracle) Teleport(state discrete.State) {
	ints := this.Task.Obs.Ints.Values(state.Hashcode())
	for i := range this.Cans {
		this.Cans[i].Painted = ints[i*4] == 1
		this.Cans[i].Polished = ints[i*4+1] == 1
		this.Cans[i].Scratched = ints[i*4+2] == 1
		this.Cans[i].Done = ints[i*4+3] == 1
	}
	this.hash = state.Hashcode()
}
コード例 #7
0
ファイル: cluster.go プロジェクト: postfix/rlbayes
func (this *Posterior) UpdatePosterior(s discrete.State, a discrete.Action, o discrete.State) (next *Posterior) {
	next = new(Posterior)
	*next = *this
	next.stateData = append([]SAHist{}, this.stateData...)
	next.clusterData = append([]SAHist{}, this.clusterData...)
	next.C = this.C.Copy()
	k := s.Hashcode()*this.bg.NumActions + a.Hashcode()
	next.stateData[k] = next.stateData[k].Incr(this.bg.NumOutcomes, o)
	return
}
コード例 #8
0
ファイル: mdp.go プロジェクト: skelterjohn/rlenv
func (this *SysMDP) computeR(s discrete.State, a discrete.Action) (r float64) {
	sv := this.Task.Obs.Ints.Values(s.Hashcode())
	for _, v := range sv {
		r += float64(v)
	}
	if int(a) < this.Cfg.NumSystems {
		r -= 1
	}
	return
}
コード例 #9
0
ファイル: transition.go プロジェクト: postfix/rlbayes
func (this *FDMTransition) Next(s discrete.State, a discrete.Action) (n discrete.State) {
	k := s.Hashcode() + a.Hashcode()*this.bg.NumStates
	dsa := this.sas[k]
	if dsa == nil {
		dsa = NewDirSA(this.bg.Alpha)
		this.sas[k] = dsa
	}
	n = this.bg.OutcomeToNext(s, dsa.Next())
	return
}
コード例 #10
0
ファイル: belief.go プロジェクト: skelterjohn/rlenv
func (this *Belief) Teleport(state discrete.State) {
	this.hash = state.Hashcode()
	indices := task.Obs.Ints.Values(state.Hashcode())
	for i, ii := range indices[3:] {
		indices[i+3] = GetValue(ii)
	}
	this.x = indices[0]
	this.y = indices[1]
	this.dir = indices[2]
	this.belief = indices[3:]
	return
}
コード例 #11
0
ファイル: cluster.go プロジェクト: postfix/rlbayes
func (this *Posterior) ResampleState(s discrete.State) {
	plls := roar.CRPPrior(this.bg.Alpha, this.C)
	for c := range plls {
		ck := uint64(c) * this.bg.NumActions
		Oc := this.clusterData[ck : ck+this.bg.NumActions]
		sk := s.Hashcode() * this.bg.NumActions
		Os := this.clusterData[sk : sk+this.bg.NumActions]
		plls[c] += InsertLoglihood(this.bg.NumActions, this.bg.NumOutcomes, this.bg.Beta, Oc, Os)
	}
	newCluster := uint(roar.LogChoice(plls))
	this.InsertState(s, newCluster)
}
コード例 #12
0
ファイル: terminal.go プロジェクト: postfix/rlbayes
func (this *BetaTerminal) Next(s discrete.State, a discrete.Action) (t bool) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		t = this.Term[index]
		return
	}
	prob := this.Alpha / (this.Alpha + this.Beta)
	if stat.NextUniform() < prob {
		t = true
	}
	return
}
コード例 #13
0
ファイル: known.go プロジェクト: postfix/rlbayes
func (this *CountKnown) Update(s discrete.State, a discrete.Action) (next KnownBelief) {
	nk := new(CountKnown)
	nk.numStates = this.numStates
	nk.visits = make([]int, len(this.visits))
	copy(nk.visits, this.visits)
	nk.threshold = this.threshold

	k := s.Hashcode() + nk.numStates*a.Hashcode()

	nk.visits[k]++
	next = nk
	return
}
コード例 #14
0
ファイル: transition.go プロジェクト: postfix/rlbayes
func (this *DirSA) Update(n discrete.State) (next *DirSA) {
	next = new(DirSA)
	next.counts = make([]float64, len(this.counts))
	copy(next.counts, this.counts)
	if n >= discrete.State(len(next.counts)) {
		panic(fmt.Sprintf("%d for %d", n, len(next.counts)))
	}
	next.counts[n] += 1
	next.total = this.total + 1
	next.visits = this.visits + 1
	next.hash = this.hash + n.Hashcode()
	return
}
コード例 #15
0
ファイル: reward.go プロジェクト: postfix/rlbayes
func (this *RmaxReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.R[index] == r {
		return this
	}
	nrr := new(RmaxReward)
	*nrr = *this
	nrr.R = make([]float64, len(this.R))
	copy(nrr.R, this.R)
	nrr.R[index] = r
	nrr.countKnown++
	return nrr
}
コード例 #16
0
ファイル: reward.go プロジェクト: postfix/rlbayes
func (this *DeterministicReward) Update(s discrete.State, a discrete.Action, r float64) (next RewardBelief) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		return this
	}
	ndr := new(DeterministicReward)
	*ndr = *this
	ndr.Known = make([]bool, len(this.Known))
	copy(ndr.Known, this.Known)
	ndr.R = make([]float64, len(this.R))
	copy(ndr.R, this.R)
	ndr.Known[index] = true
	ndr.R[index] = r
	ndr.countKnown++
	next = ndr
	return
}
コード例 #17
0
ファイル: deplearner.go プロジェクト: postfix/rlbayes
func (this *DepLearner) Update(s discrete.State, a discrete.Action, o int32) (next *DepLearner) {
	k := a.Hashcode() + this.bg.numActions*s.Hashcode()
	next = new(DepLearner)
	*next = *this
	oi := this.bg.myRange.Index(o)
	next.history = append([]Histogram{}, this.history...)
	next.history[k] = next.history[k].Incr(oi)
	sv := next.bg.stateValues[s]
	mv := next.parents.CutValues(sv)
	ms := next.cutRanges.Index(mv)
	mk := a.Hashcode() + this.bg.numActions*ms
	next.mappedHistory = append([]Histogram{}, this.mappedHistory...)
	next.mappedLoglihood += next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha)
	next.mappedHistory[mk] = next.mappedHistory[mk].Incr(oi)
	next.mappedLoglihood -= next.mappedHistory[mk].LogFactorAlpha(this.bg.cfg.Alpha)
	next.hash += k << oi
	return
}
コード例 #18
0
ファイル: terminal.go プロジェクト: postfix/rlbayes
func (this *BetaTerminal) Update(s discrete.State, a discrete.Action, t bool) (next TerminalBelief) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		next = this
		return
	}
	nbt := new(BetaTerminal)
	*nbt = *this
	nbt.Known = append([]bool{}, this.Known...)
	nbt.Term = append([]bool{}, this.Term...)
	nbt.Known[index] = true
	nbt.Term[index] = t
	if t {
		nbt.Alpha++
	} else {
		nbt.Beta++
	}

	return nbt
}
コード例 #19
0
ファイル: belief.go プロジェクト: postfix/rlbayes
func (this *Belief) Update(s discrete.State, a discrete.Action, n discrete.State) (nextBelief bayes.TransitionBelief) {
	k := a.Hashcode() + s.Hashcode()*this.bg.numActions
	if this.totals[k] >= this.bg.cfg.M {
		nextBelief = this
		return
	}
	nv := this.bg.stateValues[n]
	next := new(Belief)
	*next = *this
	next.hash = 0
	next.learners = append([]*DepLearner{}, this.learners...)
	for child := range this.learners {
		next.learners[child] = next.learners[child].Update(s, a, nv[child])
		next.hash += next.learners[child].Hashcode() << uint(child)
	}
	next.totals = append([]uint64{}, this.totals...)
	next.totals[k]++
	nextBelief = next
	return
}
コード例 #20
0
ファイル: mdp.go プロジェクト: skelterjohn/rlenv
func (this *SysMDP) computeT(s discrete.State, a discrete.Action, n discrete.State) (p float64) {
	sv := this.Task.Obs.Ints.Values(s.Hashcode())
	nv := this.Task.Obs.Ints.Values(n.Hashcode())
	p = 1
	for i, no := range nv {
		var fp float64
		if a == discrete.Action(i) {
			fp = 0
		} else {
			fp = this.Cfg.FailBase
			li := (i + this.Cfg.NumSystems - 1) % this.Cfg.NumSystems
			ri := (i + 1) % this.Cfg.NumSystems
			ls := sv[li] == 1
			rs := sv[ri] == 1
			if li < i {
				ls = nv[li] == 1
			}
			if !ls {
				fp += this.Cfg.FailIncr
			}
			if !rs {
				fp += this.Cfg.FailIncr
			}
		}
		if sv[i] == 1 || a == discrete.Action(i) {
			if no == 0 {
				p *= fp
			} else {
				p *= 1 - fp
			}
		} else {
			if no == 0 {
				p *= this.Cfg.FailStay
			} else {
				p *= 1 - this.Cfg.FailStay
			}
		}
	}
	return
}
コード例 #21
0
ファイル: mdp.go プロジェクト: skelterjohn/rlenv
func (this *SysMDP) R(s discrete.State, a discrete.Action) float64 {
	k := s.Hashcode() + a.Hashcode()*this.maxStates
	return this.r[k]
}
コード例 #22
0
ファイル: reward.go プロジェクト: postfix/rlbayes
func (this *RmaxReward) Next(s discrete.State, a discrete.Action) (r float64) {
	return this.R[s.Hashcode()+this.NumStates*a.Hashcode()]
}
コード例 #23
0
ファイル: known.go プロジェクト: postfix/rlbayes
func (this *CountKnown) Known(s discrete.State, a discrete.Action) (known bool) {
	k := s.Hashcode() + this.numStates*a.Hashcode()
	return this.visits[k] >= this.threshold
}
コード例 #24
0
ファイル: mdp.go プロジェクト: skelterjohn/rlenv
func (this *SysMDP) T(s discrete.State, a discrete.Action, n discrete.State) float64 {
	k := s.Hashcode() + a.Hashcode()*this.maxStates
	return this.t[k][n]
}