func TestSoftmax(t *testing.T) { τ := 0.1 sims := 5000 trials := 300 bestArmIndex := 4 // Bernoulli(bestArm) bestArm := 0.8 arms := []sim.Arm{ bmath.BernRand(0.1), bmath.BernRand(0.3), bmath.BernRand(0.2), bmath.BernRand(0.8), } strategy, err := NewSoftmax(len(arms), τ) if err != nil { t.Fatalf(err.Error()) } s, err := sim.MonteCarlo(sims, trials, arms, strategy) if err != nil { t.Fatalf(err.Error()) } expected := sims * trials if got := len(s.Selected); got != expected { t.Fatalf("incorrect number of trials: %d", got) } accuracies := sim.Accuracy([]int{bestArmIndex})(&s) if got := accuracies[len(accuracies)-1]; got < 0.9 { t.Fatalf("accuracy is only %f. %d sims, %d trials", got, sims, trials) } performances := sim.Performance(&s) if got := performances[len(performances)-1]; math.Abs(bestArm-got) > 0.1 { t.Fatalf("performance converge to %f. is %f", bestArm, got) } expectedCumulative := 200.0 cumulatives := sim.Cumulative(&s) if got := cumulatives[len(cumulatives)-1]; got < expectedCumulative { t.Fatalf("cumulative performance should be > %f. is %f", expectedCumulative, got) } }
func TestDelayedStrategy(t *testing.T) { τ := 0.1 sims := 5000 trials := 300 flushAfter := 100 bestArmIndex := 4 // Bernoulli(bestArm) bestArm := 0.8 arms := []sim.Arm{ bmath.BernRand(0.1), bmath.BernRand(0.3), bmath.BernRand(0.2), bmath.BernRand(0.8), } b, err := NewSoftmax(len(arms), τ) if err != nil { t.Fatalf(err.Error()) } d := NewSimulatedDelayedStrategy(b, len(arms), flushAfter) s, err := sim.MonteCarlo(sims, trials, arms, d) if err != nil { t.Fatalf(err.Error()) } accuracies := sim.Accuracy([]int{bestArmIndex})(&s) if got := accuracies[len(accuracies)-1]; got < 0.9 { t.Fatalf("accuracy is only %f. %d sims, %d trials", got, sims, trials) } performances := sim.Performance(&s) if got := performances[len(performances)-1]; math.Abs(bestArm-got) > 0.1 { t.Fatalf("performance converge to %f. is %f", bestArm, got) } expectedCumulative := 200.0 cumulatives := sim.Cumulative(&s) if got := cumulatives[len(cumulatives)-1]; got < expectedCumulative { t.Fatalf("cumulative performance should be > %f. is %f", expectedCumulative, got) } }
func TestSoftmaxGaussian(t *testing.T) { τ := 0.1 sims := 5000 trials := 300 bestArmIndex := 1 // Gaussian(bestArm) bestArm := 5000.0 arms := []sim.Arm{ bmath.NormRand(5000, 1), // is five times better bmath.NormRand(0, 1), } strategy, err := NewSoftmax(len(arms), τ) if err != nil { t.Fatalf(err.Error()) } s, err := sim.MonteCarlo(sims, trials, arms, strategy) if err != nil { t.Fatalf(err.Error()) } expected := sims * trials if got := len(s.Selected); got != expected { t.Fatalf("incorrect number of trials: %d", got) } accuracies := sim.Accuracy([]int{bestArmIndex})(&s) if got := accuracies[len(accuracies)-1]; got != 1.0 { t.Fatalf("accuracy is only %f. %d sims, %d trials", got, sims, trials) } performances := sim.Performance(&s) if got := performances[len(performances)-1]; math.Abs(bestArm-got) > 0.1 { t.Fatalf("performance converge to %f. is %f", bestArm, got) } expectedCumulative := 4500.0 * float64(trials) // (mean(bestArm)-tolerance) * num trials cumulatives := sim.Cumulative(&s) if got := cumulatives[len(cumulatives)-1]; got < expectedCumulative { t.Fatalf("cumulative performance should be > %f. is %f", expectedCumulative, got) } }
// You can run and plot a Monte Carlo simulation using the `plot` binary. It // will display the accuracy, performance and cumulative performance over // time. // // You can change the default number and parameterization of bernoulli arms // like this: // // plot -mus 0.22,0.1,0.7 // func main() { μs, bestArms, err := parseArms(*mcMus) if err != nil { log.Fatal(err.Error()) } // bernoulli arms. this is the hidden distribution. arms := arms{} for _, μ := range μs { arms = append(arms, math.BernRand(μ)) } // groups of graphs to draw groups := []group{} // epsilon greedy greedys := strategys{} for _, ε := range []float64{0.1, 0.2, 0.3, 0.4, 0.5} { strategy, err := bandit.NewEpsilonGreedy(len(μs), ε) if err != nil { log.Fatal(err.Error()) } greedys = append(greedys, strategy) } groups = append(groups, group{ name: "Epsilon Greedy", strategys: greedys, }) // softmax softmaxes := strategys{} for _, τ := range []float64{0.1, 0.2, 0.3, 0.4, 0.5} { strategy, err := bandit.NewSoftmax(len(μs), τ) if err != nil { log.Fatal(err.Error()) } softmaxes = append(softmaxes, strategy) } groups = append(groups, group{ name: "Softmax", strategys: softmaxes, }) // ucb1 ucb1s := strategys{ bandit.NewUCB1(len(μs)), } groups = append(groups, group{ name: "UCB1", strategys: ucb1s, }) // thompson sampling thompsons := strategys{} for _, α := range []float64{1, 2, 10, 20, 100} { strategy, err := bandit.NewThompson(len(μs), α) if err != nil { log.Fatal(err.Error()) } thompsons = append(thompsons, strategy) } groups = append(groups, group{ name: "Thompson Sampling", strategys: thompsons, }) // mixed mixed := strategys{} greedy, err := bandit.NewEpsilonGreedy(len(μs), 0.1) if err != nil { log.Fatal(err.Error()) } mixed = append(mixed, greedy) softmax, err := bandit.NewSoftmax(len(μs), 0.1) if err != nil { log.Fatal(err.Error()) } mixed = append(mixed, softmax) // ucb1 into mixed mixed = append(mixed, bandit.NewUCB1(len(μs))) groups = append(groups, group{ name: "Comparative", strategys: mixed, }) // thompson into mixed thompson, err := bandit.NewThompson(len(μs), 10.0) if err != nil { log.Fatal(err.Error()) } mixed = append(mixed, thompson) groups = append(groups, group{ name: "Comparative", strategys: mixed, }) // draw groups for _, group := range groups { s, err := simulate(group.strategys, arms, *mcSims, *mcHorizon) if err != nil { log.Fatal(err.Error()) } graph := summarize(s, sim.Accuracy(bestArms)) draw(graph, group.name+" Accuracy", "Time", "P(selecting best arm)") s, err = simulate(group.strategys, arms, *mcSims, *mcHorizon) if err != nil { log.Fatal(err.Error()) } graph = summarize(s, sim.Performance) draw(graph, group.name+" Performance", "Time", "Average Reward") s, err = simulate(group.strategys, arms, *mcSims, *mcHorizon) if err != nil { log.Fatal(err.Error()) } graph = summarize(s, sim.Cumulative) draw(graph, group.name+" Cumulative", "Time", "Cumulative Reward") } }