Beispiel #1
0
func (this *CRPReward) Next(s discrete.State, a discrete.Action) (r float64) {
	index := s.Hashcode() + this.NumStates*a.Hashcode()
	if this.Known[index] {
		r = this.R[index]
		return
	}

	if this.chooser == nil {
		if len(this.Counts) == 0 {
			this.chooser = func() int64 { return 0 }
		} else {
			normalizer := 1.0 / (float64(this.Total) + this.Alpha)
			weights := make([]float64, len(this.Counts))
			for i := range weights {
				weights[i] = float64(this.Counts[i]) * normalizer
			}
			this.chooser = stat.Choice(weights)
		}
	}

	which := int(this.chooser())
	if which == len(this.SeenRewards) {
		r = this.BaseSampler()
	} else {
		r = this.SeenRewards[which]
	}

	return
}
Beispiel #2
0
func (this Histogram) GetChoice() (chooser func() uint64) {
	var sum float64
	for _, v := range this {
		sum += float64(v)
	}
	norm := 1 / sum
	weights := make([]float64, len(this))
	for i, v := range this {
		weights[i] = float64(v) * norm
	}
	chooser = func() uint64 { chooser := stat.Choice(weights); return uint64(chooser()) }
	return
}