func (this *CRPReward) Next(s discrete.State, a discrete.Action) (r float64) { index := s.Hashcode() + this.NumStates*a.Hashcode() if this.Known[index] { r = this.R[index] return } if this.chooser == nil { if len(this.Counts) == 0 { this.chooser = func() int64 { return 0 } } else { normalizer := 1.0 / (float64(this.Total) + this.Alpha) weights := make([]float64, len(this.Counts)) for i := range weights { weights[i] = float64(this.Counts[i]) * normalizer } this.chooser = stat.Choice(weights) } } which := int(this.chooser()) if which == len(this.SeenRewards) { r = this.BaseSampler() } else { r = this.SeenRewards[which] } return }
func (this Histogram) GetChoice() (chooser func() uint64) { var sum float64 for _, v := range this { sum += float64(v) } norm := 1 / sum weights := make([]float64, len(this)) for i, v := range this { weights[i] = float64(v) * norm } chooser = func() uint64 { chooser := stat.Choice(weights); return uint64(chooser()) } return }