Example #1
0
func testPutInner(ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	db, err := c.NewClient(ctx, 0)
	if err != nil {
		t.Fatal(err)
	}

	errs := make(chan error, c.NumNodes())
	start := timeutil.Now()
	deadline := start.Add(cfg.Duration)
	var count int64
	for i := 0; i < c.NumNodes(); i++ {
		go func() {
			r, _ := randutil.NewPseudoRand()
			value := randutil.RandBytes(r, 8192)

			for timeutil.Now().Before(deadline) {
				k := atomic.AddInt64(&count, 1)
				v := value[:r.Intn(len(value))]
				if err := db.Put(ctx, fmt.Sprintf("%08d", k), v); err != nil {
					errs <- err
					return
				}
			}
			errs <- nil
		}()
	}

	for i := 0; i < c.NumNodes(); {
		baseCount := atomic.LoadInt64(&count)
		select {
		case <-stopper.ShouldStop():
			t.Fatalf("interrupted")
		case err := <-errs:
			if err != nil {
				t.Fatal(err)
			}
			i++
		case <-time.After(1 * time.Second):
			// Periodically print out progress so that we know the test is still
			// running.
			loadedCount := atomic.LoadInt64(&count)
			log.Infof(ctx, "%d (%d/s)", loadedCount, loadedCount-baseCount)
			c.Assert(ctx, t)
			if err := cluster.Consistent(ctx, c, 0); err != nil {
				t.Fatal(err)
			}
		}
	}

	elapsed := timeutil.Since(start)
	log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Example #2
0
// chaosMonkey picks a set of nodes and restarts them. If stopClients is set
// all the clients are locked before the nodes are restarted.
func chaosMonkey(
	ctx context.Context,
	state *testState,
	c cluster.Cluster,
	stopClients bool,
	pickNodes func() []int,
	consistentIdx int,
) {
	defer close(state.teardown)
	for curRound := uint64(1); !state.done(); curRound++ {
		atomic.StoreUint64(&state.monkeyIteration, curRound)
		select {
		case <-stopper.ShouldStop():
			return
		default:
		}

		// Pick nodes to be restarted.
		nodes := pickNodes()

		if stopClients {
			// Prevent all clients from writing while nodes are being restarted.
			for i := 0; i < len(state.clients); i++ {
				state.clients[i].Lock()
			}
		}
		log.Infof(ctx, "round %d: restarting nodes %v", curRound, nodes)
		for _, i := range nodes {
			// Two early exit conditions.
			select {
			case <-stopper.ShouldStop():
				break
			default:
			}
			if state.done() {
				break
			}
			log.Infof(ctx, "round %d: restarting %d", curRound, i)
			if err := c.Kill(ctx, i); err != nil {
				state.t.Error(err)
			}
			if err := c.Restart(ctx, i); err != nil {
				state.t.Error(err)
			}
			if stopClients {
				// Reinitialize the client talking to the restarted node.
				state.initClient(ctx, state.t, c, i)
			}
		}
		if stopClients {
			for i := 0; i < len(state.clients); i++ {
				state.clients[i].Unlock()
			}
		}

		preCount := state.counts()

		madeProgress := func() bool {
			newCounts := state.counts()
			for i := range newCounts {
				if newCounts[i] > preCount[i] {
					return true
				}
			}
			return false
		}

		// Sleep until at least one client is writing successfully.
		log.Warningf(ctx, "round %d: monkey sleeping while cluster recovers...", curRound)
		for !state.done() && !madeProgress() {
			time.Sleep(time.Second)
		}
		c.Assert(ctx, state.t)

		if err := cluster.Consistent(ctx, c, consistentIdx); err != nil {
			state.t.Error(err)
		}
		log.Warningf(ctx, "round %d: cluster recovered", curRound)
	}
}