Example #1
0
func testNodeRestartInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	num := c.NumNodes()
	if num <= 0 {
		t.Fatalf("%d nodes in cluster", num)
	}

	// One client for each node.
	initBank(t, c.PGUrl(0))

	start := timeutil.Now()
	state := testState{
		t:        t,
		errChan:  make(chan error, 1),
		teardown: make(chan struct{}),
		deadline: start.Add(cfg.Duration),
		clients:  make([]testClient, 1),
	}

	client := &state.clients[0]
	client.Lock()
	client.db = makePGClient(t, c.PGUrl(num-1))
	client.Unlock()
	go transferMoneyLoop(0, &state, *numAccounts, *maxTransfer)

	defer func() {
		<-state.teardown
	}()

	// Chaos monkey.
	rnd, seed := randutil.NewPseudoRand()
	log.Warningf("monkey starts (seed %d)", seed)
	pickNodes := func() []int {
		return []int{rnd.Intn(num - 1)}
	}
	go chaosMonkey(&state, c, false, pickNodes)

	waitClientsStop(1, &state, cfg.Stall)

	// Verify accounts.
	verifyAccounts(t, client)

	elapsed := time.Since(start)
	count := atomic.LoadUint64(&client.count)
	log.Infof("%d %.1f/sec", count, float64(count)/elapsed.Seconds())
	kvClient, kvStopper := c.NewClient(t, num-1)
	defer kvStopper.Stop()
	if pErr := kvClient.CheckConsistency(keys.TableDataMin, keys.TableDataMax); pErr != nil {
		// TODO(.*): change back to t.Fatal after #5051.
		log.Error(pErr)
	}
}
Example #2
0
func testClusterRecoveryInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	num := c.NumNodes()
	if num <= 0 {
		t.Fatalf("%d nodes in cluster", num)
	}

	// One client for each node.
	initBank(t, c.PGUrl(0))

	start := timeutil.Now()
	state := testState{
		t:        t,
		errChan:  make(chan error, num),
		teardown: make(chan struct{}),
		deadline: start.Add(cfg.Duration),
		clients:  make([]testClient, num),
	}

	for i := 0; i < num; i++ {
		state.clients[i].Lock()
		state.initClient(t, c, i)
		state.clients[i].Unlock()
		go transferMoneyLoop(i, &state, *numAccounts, *maxTransfer)
	}

	defer func() {
		<-state.teardown
	}()

	// Chaos monkey.
	rnd, seed := randutil.NewPseudoRand()
	log.Warningf("monkey starts (seed %d)", seed)
	pickNodes := func() []int {
		return rnd.Perm(num)[:rnd.Intn(num)+1]
	}
	go chaosMonkey(&state, c, true, pickNodes)

	waitClientsStop(num, &state, cfg.Stall)

	// Verify accounts.
	verifyAccounts(t, &state.clients[0])

	elapsed := time.Since(start)
	var count uint64
	counts := state.counts()
	for _, c := range counts {
		count += c
	}
	log.Infof("%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Example #3
0
func testNodeRestartInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	num := c.NumNodes()
	if minNum := 3; num < minNum {
		t.Skipf("need at least %d nodes, got %d", minNum, num)
	}

	// One client for each node.
	initBank(t, c.PGUrl(0))

	start := timeutil.Now()
	state := testState{
		t:        t,
		errChan:  make(chan error, 1),
		teardown: make(chan struct{}),
		deadline: start.Add(cfg.Duration),
		clients:  make([]testClient, 1),
	}

	client := &state.clients[0]
	client.Lock()
	client.db = makePGClient(t, c.PGUrl(num-1))
	client.Unlock()
	go transferMoneyLoop(0, &state, *numAccounts, *maxTransfer)

	defer func() {
		<-state.teardown
	}()

	// Chaos monkey.
	rnd, seed := randutil.NewPseudoRand()
	log.Warningf(context.Background(), "monkey starts (seed %d)", seed)
	pickNodes := func() []int {
		return []int{rnd.Intn(num - 1)}
	}
	go chaosMonkey(&state, c, false, pickNodes)

	waitClientsStop(1, &state, stall)

	// Verify accounts.
	verifyAccounts(t, client)

	elapsed := timeutil.Since(start)
	count := atomic.LoadUint64(&client.count)
	log.Infof(context.Background(), "%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Example #4
0
// initClient initializes the client talking to node "i".
// It requires that the caller hold the client's write lock.
func (state *testState) initClient(t *testing.T, c cluster.Cluster, i int) {
	state.clients[i].db = makePGClient(t, c.PGUrl(i))
}
func testMonotonicInsertsInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	var clients []mtClient
	for i := 0; i < c.NumNodes(); i++ {
		clients = append(clients, mtClient{ID: i, DB: makePGClient(t, c.PGUrl(i))})
	}
	// We will insert into this table by selecting MAX(val) and increasing by
	// one and expect that val and sts (the commit timestamp) are both
	// simultaneously increasing.
	if _, err := clients[0].Exec(`
CREATE DATABASE mono;
CREATE TABLE IF NOT EXISTS mono.mono (val INT, sts STRING, node INT, tb INT);
INSERT INTO mono.mono VALUES(-1, '0', -1, -1)`); err != nil {
		t.Fatal(err)
	}

	var idGen uint64

	invoke := func(client mtClient) {
		logPrefix := fmt.Sprintf("%03d.%03d: ", atomic.AddUint64(&idGen, 1), client.ID)
		l := func(msg string, args ...interface{}) {
			t.Logf(logPrefix+msg, args...)
		}
		l("begin")
		defer l("done")

		var exRow, insRow mtRow
		var attempt int
		if err := crdb.ExecuteTx(client.DB, func(tx *gosql.Tx) error {
			attempt++
			l("attempt %d", attempt)
			if err := tx.QueryRow(`SELECT cluster_logical_timestamp()`).Scan(
				&insRow.sts,
			); err != nil {
				l(err.Error())
				return err
			}

			l("read max val")
			if err := tx.QueryRow(`SELECT MAX(val) AS m FROM mono.mono`).Scan(
				&exRow.val,
			); err != nil {
				l(err.Error())
				return err
			}

			l("read max row for val=%d", exRow.val)
			if err := tx.QueryRow(`SELECT sts, node, tb FROM mono.mono WHERE val = $1`,
				exRow.val,
			).Scan(
				&exRow.sts, &exRow.node, &exRow.tb,
			); err != nil {
				l(err.Error())
				return err
			}

			l("insert")
			if err := tx.QueryRow(`
INSERT INTO mono.mono (val, sts, node, tb) VALUES($1, $2, $3, $4)
RETURNING val, sts, node, tb`,
				exRow.val+1, insRow.sts, client.ID, 0,
			).Scan(
				&insRow.val, &insRow.sts, &insRow.node, &insRow.tb,
			); err != nil {
				l(err.Error())
				return err
			}
			l("commit")
			return nil
		}); err != nil {
			t.Errorf("%T: %v", err, err)
		}
	}

	verify := func() {
		client := clients[0]
		var numDistinct int
		if err := client.QueryRow("SELECT COUNT(DISTINCT(val)) FROM mono.mono").Scan(
			&numDistinct,
		); err != nil {
			t.Fatal(err)
		}
		rows, err := client.Query("SELECT val, sts, node, tb FROM mono.mono ORDER BY val ASC, sts ASC")
		if err != nil {
			t.Fatal(err)
		}
		var results mtRows
		for rows.Next() {
			var row mtRow
			if err := rows.Scan(&row.val, &row.sts, &row.node, &row.tb); err != nil {
				t.Fatal(err)
			}
			results = append(results, row)
		}

		if !sort.IsSorted(results) {
			t.Errorf("results are not sorted:\n%s", results)
		}

		if numDistinct != len(results) {
			t.Errorf("'val' column is not unique: %d results, but %d distinct:\n%s",
				len(results), numDistinct, results)
		}
	}

	concurrency := 2 * c.NumNodes()

	sem := make(chan struct{}, concurrency)
	timer := time.After(cfg.Duration)

	defer verify()
	defer func() {
		// Now that consuming has stopped, fill up the semaphore (i.e. wait for
		// still-running goroutines to stop)
		for i := 0; i < concurrency; i++ {
			sem <- struct{}{}
		}
	}()

	for {
		select {
		case sem <- struct{}{}:
		case <-stopper:
			return
		case <-timer:
			return
		}
		go func(client mtClient) {
			invoke(client)
			<-sem
		}(clients[rand.Intn(c.NumNodes())])
	}
}
Example #6
0
func testEventLogInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	num := c.NumNodes()
	if num <= 0 {
		t.Fatalf("%d nodes in cluster", num)
	}

	var confirmedClusterID uuid.UUID
	type nodeEventInfo struct {
		Descriptor roachpb.NodeDescriptor
		ClusterID  uuid.UUID
	}

	// Verify that a node_join message was logged for each node in the cluster.
	// We expect there to eventually be one such message for each node in the
	// cluster, and each message must be correctly formatted.
	util.SucceedsSoon(t, func() error {
		db := makePGClient(t, c.PGUrl(0))
		defer db.Close()

		// Query all node join events. There should be one for each node in the
		// cluster.
		rows, err := db.Query(
			"SELECT targetID, info FROM system.eventlog WHERE eventType = $1",
			string(csql.EventLogNodeJoin))
		if err != nil {
			return err
		}
		seenIds := make(map[int64]struct{})
		var clusterID uuid.UUID
		for rows.Next() {
			var targetID int64
			var infoStr gosql.NullString
			if err := rows.Scan(&targetID, &infoStr); err != nil {
				t.Fatal(err)
			}

			// Verify the stored node descriptor.
			if !infoStr.Valid {
				t.Fatalf("info not recorded for node join, target node %d", targetID)
			}
			var info nodeEventInfo
			if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil {
				t.Fatal(err)
			}
			if a, e := int64(info.Descriptor.NodeID), targetID; a != e {
				t.Fatalf("Node join with targetID %d had descriptor for wrong node %d", e, a)
			}

			// Verify cluster ID is recorded, and is the same for all nodes.
			if uuid.Equal(info.ClusterID, *uuid.EmptyUUID) {
				t.Fatalf("Node join recorded nil cluster id, info: %v", info)
			}
			if uuid.Equal(clusterID, *uuid.EmptyUUID) {
				clusterID = info.ClusterID
			} else if !uuid.Equal(clusterID, info.ClusterID) {
				t.Fatalf(
					"Node join recorded different cluster ID than earlier node. Expected %s, got %s. Info: %v",
					clusterID, info.ClusterID, info)
			}

			// Verify that all NodeIDs are different.
			if _, ok := seenIds[targetID]; ok {
				t.Fatalf("Node ID %d seen in two different node join messages", targetID)
			}
			seenIds[targetID] = struct{}{}
		}
		if err := rows.Err(); err != nil {
			return err
		}

		if a, e := len(seenIds), c.NumNodes(); a != e {
			return errors.Errorf("expected %d node join messages, found %d: %v", e, a, seenIds)
		}

		confirmedClusterID = clusterID
		return nil
	})

	// Stop and Start Node 0, and verify the node restart message.
	if err := c.Kill(0); err != nil {
		t.Fatal(err)
	}
	if err := c.Restart(0); err != nil {
		t.Fatal(err)
	}

	util.SucceedsSoon(t, func() error {
		db := makePGClient(t, c.PGUrl(0))
		defer db.Close()

		// Query all node restart events. There should only be one.
		rows, err := db.Query(
			"SELECT targetID, info FROM system.eventlog WHERE eventType = $1",
			string(csql.EventLogNodeRestart))
		if err != nil {
			return err
		}

		seenCount := 0
		for rows.Next() {
			var targetID int64
			var infoStr gosql.NullString
			if err := rows.Scan(&targetID, &infoStr); err != nil {
				t.Fatal(err)
			}

			// Verify the stored node descriptor.
			if !infoStr.Valid {
				t.Fatalf("info not recorded for node join, target node %d", targetID)
			}
			var info nodeEventInfo
			if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil {
				t.Fatal(err)
			}
			if a, e := int64(info.Descriptor.NodeID), targetID; a != e {
				t.Fatalf("Node join with targetID %d had descriptor for wrong node %d", e, a)
			}

			// Verify cluster ID is recorded, and is the same for all nodes.
			if !uuid.Equal(confirmedClusterID, info.ClusterID) {
				t.Fatalf(
					"Node restart recorded different cluster ID than earlier join. Expected %s, got %s. Info: %v",
					confirmedClusterID, info.ClusterID, info)
			}

			seenCount++
		}
		if err := rows.Err(); err != nil {
			return err
		}
		if seenCount != 1 {
			return errors.Errorf("Expected only one node restart event, found %d", seenCount)
		}
		return nil
	})
}