Exemplo n.º 1
0
func TestSoftmax(t *testing.T) {
	τ := 0.1
	sims := 5000
	trials := 300
	bestArmIndex := 4 // Bernoulli(bestArm)
	bestArm := 0.8
	arms := []sim.Arm{
		bmath.BernRand(0.1),
		bmath.BernRand(0.3),
		bmath.BernRand(0.2),
		bmath.BernRand(0.8),
	}

	strategy, err := NewSoftmax(len(arms), τ)
	if err != nil {
		t.Fatalf(err.Error())
	}

	s, err := sim.MonteCarlo(sims, trials, arms, strategy)
	if err != nil {
		t.Fatalf(err.Error())
	}

	expected := sims * trials
	if got := len(s.Selected); got != expected {
		t.Fatalf("incorrect number of trials: %d", got)
	}

	accuracies := sim.Accuracy([]int{bestArmIndex})(&s)
	if got := accuracies[len(accuracies)-1]; got < 0.9 {
		t.Fatalf("accuracy is only %f. %d sims, %d trials", got, sims, trials)
	}

	performances := sim.Performance(&s)
	if got := performances[len(performances)-1]; math.Abs(bestArm-got) > 0.1 {
		t.Fatalf("performance converge to %f. is %f", bestArm, got)
	}

	expectedCumulative := 200.0
	cumulatives := sim.Cumulative(&s)
	if got := cumulatives[len(cumulatives)-1]; got < expectedCumulative {
		t.Fatalf("cumulative performance should be > %f. is %f", expectedCumulative, got)
	}
}
Exemplo n.º 2
0
func TestDelayedStrategy(t *testing.T) {
	τ := 0.1
	sims := 5000
	trials := 300
	flushAfter := 100
	bestArmIndex := 4 // Bernoulli(bestArm)
	bestArm := 0.8
	arms := []sim.Arm{
		bmath.BernRand(0.1),
		bmath.BernRand(0.3),
		bmath.BernRand(0.2),
		bmath.BernRand(0.8),
	}

	b, err := NewSoftmax(len(arms), τ)
	if err != nil {
		t.Fatalf(err.Error())
	}

	d := NewSimulatedDelayedStrategy(b, len(arms), flushAfter)

	s, err := sim.MonteCarlo(sims, trials, arms, d)
	if err != nil {
		t.Fatalf(err.Error())
	}

	accuracies := sim.Accuracy([]int{bestArmIndex})(&s)
	if got := accuracies[len(accuracies)-1]; got < 0.9 {
		t.Fatalf("accuracy is only %f. %d sims, %d trials", got, sims, trials)
	}

	performances := sim.Performance(&s)
	if got := performances[len(performances)-1]; math.Abs(bestArm-got) > 0.1 {
		t.Fatalf("performance converge to %f. is %f", bestArm, got)
	}

	expectedCumulative := 200.0
	cumulatives := sim.Cumulative(&s)
	if got := cumulatives[len(cumulatives)-1]; got < expectedCumulative {
		t.Fatalf("cumulative performance should be > %f. is %f", expectedCumulative, got)
	}
}
Exemplo n.º 3
0
// You can run and plot a Monte Carlo simulation using the `plot` binary. It
// will display the accuracy, performance and cumulative performance over
// time.
//
// You can change the default number and parameterization of bernoulli arms
// like this:
//
// plot -mus 0.22,0.1,0.7
//
func main() {
	μs, bestArms, err := parseArms(*mcMus)
	if err != nil {
		log.Fatal(err.Error())
	}

	// bernoulli arms. this is the hidden distribution.
	arms := arms{}
	for _, μ := range μs {
		arms = append(arms, math.BernRand(μ))
	}

	// groups of graphs to draw
	groups := []group{}

	// epsilon greedy
	greedys := strategys{}
	for _, ε := range []float64{0.1, 0.2, 0.3, 0.4, 0.5} {
		strategy, err := bandit.NewEpsilonGreedy(len(μs), ε)
		if err != nil {
			log.Fatal(err.Error())
		}

		greedys = append(greedys, strategy)
	}

	groups = append(groups, group{
		name:      "Epsilon Greedy",
		strategys: greedys,
	})

	// softmax
	softmaxes := strategys{}
	for _, τ := range []float64{0.1, 0.2, 0.3, 0.4, 0.5} {
		strategy, err := bandit.NewSoftmax(len(μs), τ)
		if err != nil {
			log.Fatal(err.Error())
		}

		softmaxes = append(softmaxes, strategy)
	}

	groups = append(groups, group{
		name:      "Softmax",
		strategys: softmaxes,
	})

	// ucb1
	ucb1s := strategys{
		bandit.NewUCB1(len(μs)),
	}

	groups = append(groups, group{
		name:      "UCB1",
		strategys: ucb1s,
	})

	// thompson sampling
	thompsons := strategys{}
	for _, α := range []float64{1, 2, 10, 20, 100} {
		strategy, err := bandit.NewThompson(len(μs), α)
		if err != nil {
			log.Fatal(err.Error())
		}

		thompsons = append(thompsons, strategy)
	}

	groups = append(groups, group{
		name:      "Thompson Sampling",
		strategys: thompsons,
	})

	// mixed
	mixed := strategys{}
	greedy, err := bandit.NewEpsilonGreedy(len(μs), 0.1)
	if err != nil {
		log.Fatal(err.Error())
	}

	mixed = append(mixed, greedy)

	softmax, err := bandit.NewSoftmax(len(μs), 0.1)
	if err != nil {
		log.Fatal(err.Error())
	}

	mixed = append(mixed, softmax)

	// ucb1 into mixed
	mixed = append(mixed, bandit.NewUCB1(len(μs)))

	groups = append(groups, group{
		name:      "Comparative",
		strategys: mixed,
	})

	// thompson into mixed
	thompson, err := bandit.NewThompson(len(μs), 10.0)
	if err != nil {
		log.Fatal(err.Error())
	}

	mixed = append(mixed, thompson)

	groups = append(groups, group{
		name:      "Comparative",
		strategys: mixed,
	})

	// draw groups
	for _, group := range groups {
		s, err := simulate(group.strategys, arms, *mcSims, *mcHorizon)
		if err != nil {
			log.Fatal(err.Error())
		}

		graph := summarize(s, sim.Accuracy(bestArms))
		draw(graph, group.name+" Accuracy", "Time", "P(selecting best arm)")

		s, err = simulate(group.strategys, arms, *mcSims, *mcHorizon)
		if err != nil {
			log.Fatal(err.Error())
		}

		graph = summarize(s, sim.Performance)
		draw(graph, group.name+" Performance", "Time", "Average Reward")

		s, err = simulate(group.strategys, arms, *mcSims, *mcHorizon)
		if err != nil {
			log.Fatal(err.Error())
		}

		graph = summarize(s, sim.Cumulative)
		draw(graph, group.name+" Cumulative", "Time", "Cumulative Reward")
	}
}