func testPutInner(ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { db, err := c.NewClient(ctx, 0) if err != nil { t.Fatal(err) } errs := make(chan error, c.NumNodes()) start := timeutil.Now() deadline := start.Add(cfg.Duration) var count int64 for i := 0; i < c.NumNodes(); i++ { go func() { r, _ := randutil.NewPseudoRand() value := randutil.RandBytes(r, 8192) for timeutil.Now().Before(deadline) { k := atomic.AddInt64(&count, 1) v := value[:r.Intn(len(value))] if err := db.Put(ctx, fmt.Sprintf("%08d", k), v); err != nil { errs <- err return } } errs <- nil }() } for i := 0; i < c.NumNodes(); { baseCount := atomic.LoadInt64(&count) select { case <-stopper.ShouldStop(): t.Fatalf("interrupted") case err := <-errs: if err != nil { t.Fatal(err) } i++ case <-time.After(1 * time.Second): // Periodically print out progress so that we know the test is still // running. loadedCount := atomic.LoadInt64(&count) log.Infof(ctx, "%d (%d/s)", loadedCount, loadedCount-baseCount) c.Assert(ctx, t) if err := cluster.Consistent(ctx, c, 0); err != nil { t.Fatal(err) } } } elapsed := timeutil.Since(start) log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds()) }
// chaosMonkey picks a set of nodes and restarts them. If stopClients is set // all the clients are locked before the nodes are restarted. func chaosMonkey( ctx context.Context, state *testState, c cluster.Cluster, stopClients bool, pickNodes func() []int, consistentIdx int, ) { defer close(state.teardown) for curRound := uint64(1); !state.done(); curRound++ { atomic.StoreUint64(&state.monkeyIteration, curRound) select { case <-stopper.ShouldStop(): return default: } // Pick nodes to be restarted. nodes := pickNodes() if stopClients { // Prevent all clients from writing while nodes are being restarted. for i := 0; i < len(state.clients); i++ { state.clients[i].Lock() } } log.Infof(ctx, "round %d: restarting nodes %v", curRound, nodes) for _, i := range nodes { // Two early exit conditions. select { case <-stopper.ShouldStop(): break default: } if state.done() { break } log.Infof(ctx, "round %d: restarting %d", curRound, i) if err := c.Kill(ctx, i); err != nil { state.t.Error(err) } if err := c.Restart(ctx, i); err != nil { state.t.Error(err) } if stopClients { // Reinitialize the client talking to the restarted node. state.initClient(ctx, state.t, c, i) } } if stopClients { for i := 0; i < len(state.clients); i++ { state.clients[i].Unlock() } } preCount := state.counts() madeProgress := func() bool { newCounts := state.counts() for i := range newCounts { if newCounts[i] > preCount[i] { return true } } return false } // Sleep until at least one client is writing successfully. log.Warningf(ctx, "round %d: monkey sleeping while cluster recovers...", curRound) for !state.done() && !madeProgress() { time.Sleep(time.Second) } c.Assert(ctx, state.t) if err := cluster.Consistent(ctx, c, consistentIdx); err != nil { state.t.Error(err) } log.Warningf(ctx, "round %d: cluster recovered", curRound) } }