// SelectArm returns 1 indexed arm to be tried next. func (u *uCB1) SelectArm() int { for i, count := range u.counts { if count == 0 { u.counts[i]++ return i + 1 } } var totalCounts int for _, count := range u.counts { totalCounts += count } ucbValues := make([]float64, u.arms) for i := 0; i < u.arms; i++ { bonus := math.Sqrt((2 * math.Log(float64(totalCounts))) / float64(u.counts[i])) ucbValues[i] = u.values[i] + bonus } _, imax := bmath.Max(ucbValues) // best arm. randomly pick because there may be equally best arms. arm := imax[u.rand.Intn(len(imax))] u.counts[arm]++ return arm + 1 }
// SelectArm returns 1 indexed arm to be tried next. func (s *softmax) SelectArm() int { max, _ := bmath.Max(s.values) normalizer := 0.0 for _, value := range s.values { normalizer += math.Exp((value - max) / s.tau) } if math.IsInf(normalizer, 0) { panic("normalizer in softmax too large") } cumulativeProb := 0.0 draw := len(s.values) - 1 z := s.rand.Float64() for i, value := range s.values { cumulativeProb = cumulativeProb + math.Exp((value-max)/s.tau)/normalizer if cumulativeProb > z { draw = i break } } s.counts[draw]++ return draw + 1 }
// SelectArm returns 1 indexed arm to be tried next. func (e *epsilonGreedy) SelectArm() int { arm := 0 if z := e.rand.Float64(); z > e.epsilon { _, imax := bmath.Max(e.values) // best arm. randomly pick because there may be equally best arms. arm = imax[e.rand.Intn(len(imax))] } else { // random arm arm = e.rand.Intn(e.arms) } e.counts[arm]++ return arm + 1 }
// SelectArm returns 1 indexed arm to be tried next. func (t *thompson) SelectArm() int { var thetas = make([]float64, t.arms) for i := 0; i < t.arms; i++ { si := t.values[i] * float64(t.counts[i]) fi := float64(t.counts[i]) - si thetas[i] = t.betaRand.NextBeta(si+t.alpha, fi+t.alpha) } _, imax := bmath.Max(thetas) // best arm. randomly pick because there may be equally best arms. arm := imax[t.rand.Intn(len(imax))] t.counts[arm]++ return arm + 1 }