Ejemplo n.º 1
0
func (this *DepLearner) MakeMappedHistory(parents ParentSet, cutRanges rlglue.IntRanges) (mappedHistory []Histogram) {
	numMappedStates := cutRanges.Count()
	mappedHistory = make([]Histogram, numMappedStates*this.bg.numActions)
	for i := range mappedHistory {
		mappedHistory[i] = make(Histogram, this.bg.numOutcomes)
	}
	for sk, h := range this.history {
		a := uint64(sk) % this.bg.numActions
		s := uint64(sk) / this.bg.numActions
		sv := this.bg.stateValues[s]
		mv := parents.CutValues(sv)
		ms := cutRanges.Index(mv)
		mk := a + this.bg.numActions*ms
		mappedHistory[mk] = mappedHistory[mk].UpdateHistogram(h)
	}
	return
}
Ejemplo n.º 2
0
func NewDepLearner(child int, cfg Config, stateRanges, actionRanges rlglue.IntRanges) (this *DepLearner) {
	this = new(DepLearner)
	this.bg = new(DBaggage)
	this.bg.cfg = cfg
	this.bg.myRange = stateRanges[child]
	this.bg.ranges = stateRanges
	this.bg.numStates = stateRanges.Count()
	this.bg.numActions = actionRanges.Count()
	this.bg.numOutcomes = stateRanges[child].Count()
	this.bg.alphaLogFactor = stat.LnΓ(this.bg.cfg.Alpha * float64(this.bg.numOutcomes))
	this.bg.alphaLogFactor -= stat.LnΓ(this.bg.cfg.Alpha) * float64(this.bg.numOutcomes)
	this.bg.stateValues = make([][]int32, this.bg.numStates)
	for s := range this.bg.stateValues {
		this.bg.stateValues[s] = this.bg.ranges.Values(uint64(s))
	}
	this.history = make([]Histogram, this.bg.numStates*this.bg.numActions)
	for i := range this.history {
		this.history[i] = make(Histogram, this.bg.numOutcomes)
	}
	this.cutRanges = this.parents.CutRanges(this.bg.ranges)
	this.mappedHistory = this.MakeMappedHistory(this.parents, this.cutRanges)
	this.mappedLoglihood = this.MappedLoglihoodRatio(this.mappedHistory)
	return
}