Esempio n. 1
0
// SelectArm returns 1 indexed arm to be tried next.
func (u *uCB1) SelectArm() int {
	for i, count := range u.counts {
		if count == 0 {
			u.counts[i]++
			return i + 1
		}
	}

	var totalCounts int
	for _, count := range u.counts {
		totalCounts += count
	}

	ucbValues := make([]float64, u.arms)
	for i := 0; i < u.arms; i++ {
		bonus := math.Sqrt((2 * math.Log(float64(totalCounts))) / float64(u.counts[i]))
		ucbValues[i] = u.values[i] + bonus
	}

	_, imax := bmath.Max(ucbValues)
	// best arm. randomly pick because there may be equally best arms.
	arm := imax[u.rand.Intn(len(imax))]

	u.counts[arm]++
	return arm + 1
}
Esempio n. 2
0
// SelectArm returns 1 indexed arm to be tried next.
func (s *softmax) SelectArm() int {
	max, _ := bmath.Max(s.values)

	normalizer := 0.0
	for _, value := range s.values {
		normalizer += math.Exp((value - max) / s.tau)
	}

	if math.IsInf(normalizer, 0) {
		panic("normalizer in softmax too large")
	}

	cumulativeProb := 0.0
	draw := len(s.values) - 1
	z := s.rand.Float64()
	for i, value := range s.values {
		cumulativeProb = cumulativeProb + math.Exp((value-max)/s.tau)/normalizer
		if cumulativeProb > z {
			draw = i
			break
		}
	}
	s.counts[draw]++
	return draw + 1
}
Esempio n. 3
0
// SelectArm returns 1 indexed arm to be tried next.
func (e *epsilonGreedy) SelectArm() int {
	arm := 0
	if z := e.rand.Float64(); z > e.epsilon {
		_, imax := bmath.Max(e.values)
		// best arm. randomly pick because there may be equally best arms.
		arm = imax[e.rand.Intn(len(imax))]
	} else {
		// random arm
		arm = e.rand.Intn(e.arms)
	}

	e.counts[arm]++
	return arm + 1
}
Esempio n. 4
0
// SelectArm returns 1 indexed arm to be tried next.
func (t *thompson) SelectArm() int {
	var thetas = make([]float64, t.arms)
	for i := 0; i < t.arms; i++ {
		si := t.values[i] * float64(t.counts[i])
		fi := float64(t.counts[i]) - si
		thetas[i] = t.betaRand.NextBeta(si+t.alpha, fi+t.alpha)
	}

	_, imax := bmath.Max(thetas)
	// best arm. randomly pick because there may be equally best arms.
	arm := imax[t.rand.Intn(len(imax))]

	t.counts[arm]++
	return arm + 1
}