func TestEagerReplication(t *testing.T) { defer leaktest.AfterTest(t)() store, stopper, _ := createTestStore(t) defer stopper.Stop() // Disable the replica scanner so that we rely on the eager replication code // path that occurs after splits. store.SetReplicaScannerDisabled(true) if err := server.WaitForInitialSplits(store.DB()); err != nil { t.Fatal(err) } // WaitForInitialSplits will return as soon as the meta2 span contains the // expected number of descriptors. But the addition of replicas to the // replicateQueue after a split occurs happens after the update of the // descriptors in meta2 leaving a tiny window of time in which the newly // split replica will not have been added to purgatory. Thus we loop. util.SucceedsSoon(t, func() error { // After the initial splits have been performed, all of the resulting ranges // should be present in replicate queue purgatory (because we only have a // single store in the test and thus replication cannot succeed). expected := server.ExpectedInitialRangeCount() if n := store.ReplicateQueuePurgatoryLength(); expected != n { return errors.Errorf("expected %d replicas in purgatory, but found %d", expected, n) } return nil }) }
func testRaftUpdateInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { minAffected := int64(server.ExpectedInitialRangeCount()) const long = time.Minute const short = 10 * time.Second mustPost := func(freeze bool) serverpb.ClusterFreezeResponse { reply, err := postFreeze(c, freeze, long) if err != nil { t.Fatal(errors.Errorf("%v", err)) } return reply } if reply := mustPost(false); reply.RangesAffected != 0 { t.Fatalf("expected initial unfreeze to affect no ranges, got %d", reply.RangesAffected) } if reply := mustPost(true); reply.RangesAffected < minAffected { t.Fatalf("expected >=%d frozen ranges, got %d", minAffected, reply.RangesAffected) } if reply := mustPost(true); reply.RangesAffected != 0 { t.Fatalf("expected second freeze to affect no ranges, got %d", reply.RangesAffected) } if reply := mustPost(false); reply.RangesAffected < minAffected { t.Fatalf("expected >=%d thawed ranges, got %d", minAffected, reply.RangesAffected) } num := c.NumNodes() if num < 3 { t.Skip("skipping remainder of test; needs at least 3 nodes") } // Kill the last node. if err := c.Kill(num - 1); err != nil { t.Fatal(err) } // Attempt to freeze should get stuck (since it does not get confirmation // of the last node receiving the freeze command). // Note that this is the freeze trigger stalling on the Replica, not the // Store-polling mechanism. acceptErrs := strings.Join([]string{ "timed out waiting for Range", "Timeout exceeded while", "connection is closing", "deadline", // error returned via JSON when the server-side gRPC stream times out (due to // lack of new input). Unmarshaling that JSON fails with a message referencing // unknown fields, unfortunately in map order. "unknown field .*", }, "|") if reply, err := postFreeze(c, true, short); !testutils.IsError(err, acceptErrs) { t.Fatalf("expected timeout, got %v: %v", err, reply) } // Shut down the remaining nodes and restart them. for i := 0; i < num-1; i++ { if err := c.Kill(i); err != nil { t.Fatal(err) } } for i := 0; i < num; i++ { if err := c.Restart(i); err != nil { t.Fatal(err) } } // The cluster should now be fully operational (at least after waiting // a little bit) since each node tries to unfreeze everything when it // starts. // // TODO(tschottdorf): we unfreeze again in the loop since Raft reproposals // can re-freeze Ranges unexpectedly. This should be re-evaluated after // #6287 removes that problem. if err := util.RetryForDuration(time.Minute, func() error { if _, err := postFreeze(c, false, short); err != nil { return err } // TODO(tschottdorf): moving the client creation outside of the retry // loop will break the test with the following message: // // client/rpc_sender.go:61: roachpb.Batch RPC failed as client // connection was closed // // Perhaps the cluster updates the address too late after restarting // the node. db, dbStopper := c.NewClient(t, 0) defer dbStopper.Stop() _, err := db.Scan(keys.LocalMax, roachpb.KeyMax, 0) if err != nil { log.Info(err) } return err }); err != nil { t.Fatal(err) } // Unfreezing again should be a no-op. if reply, err := postFreeze(c, false, long); err != nil { t.Fatal(err) } else if reply.RangesAffected > 0 { t.Fatalf("still %d frozen ranges", reply.RangesAffected) } }
// TestSplitOnTableBoundaries verifies that ranges get split // as new tables get created. func TestSplitOnTableBoundaries(t *testing.T) { defer leaktest.AfterTest(t)() params, _ := createTestServerParams() // We want fast scan. params.ScanInterval = time.Millisecond params.ScanMaxIdleTime = time.Millisecond s, sqlDB, kvDB := serverutils.StartServer(t, params) defer s.Stopper().Stop() expectedInitialRanges := server.ExpectedInitialRangeCount() if _, err := sqlDB.Exec(`CREATE DATABASE test`); err != nil { t.Fatal(err) } // We split up to the largest allocated descriptor ID, be it a table // or a database. util.SucceedsSoon(t, func() error { num, err := getNumRanges(kvDB) if err != nil { return err } if e := expectedInitialRanges + 1; num != e { return errors.Errorf("expected %d splits, found %d", e, num) } return nil }) // Verify the actual splits. objectID := uint32(keys.MaxReservedDescID + 1) splits := []roachpb.RKey{keys.MakeTablePrefix(objectID), roachpb.RKeyMax} ranges, err := getRangeKeys(kvDB) if err != nil { t.Fatal(err) } if a, e := ranges[expectedInitialRanges-1:], splits; !rangesMatchSplits(a, e) { t.Fatalf("Found ranges: %v\nexpected: %v", a, e) } // Let's create a table. if _, err := sqlDB.Exec(`CREATE TABLE test.test (k INT PRIMARY KEY, v INT)`); err != nil { t.Fatal(err) } util.SucceedsSoon(t, func() error { num, err := getNumRanges(kvDB) if err != nil { return err } if e := expectedInitialRanges + 2; num != e { return errors.Errorf("expected %d splits, found %d", e, num) } return nil }) // Verify the actual splits. splits = []roachpb.RKey{keys.MakeTablePrefix(objectID), keys.MakeTablePrefix(objectID + 1), roachpb.RKeyMax} ranges, err = getRangeKeys(kvDB) if err != nil { t.Fatal(err) } if a, e := ranges[expectedInitialRanges-1:], splits; !rangesMatchSplits(a, e) { t.Fatalf("Found ranges: %v\nexpected: %v", a, e) } }
func TestLogSplits(t *testing.T) { defer leaktest.AfterTest(t)() s := server.StartTestServer(t) defer s.Stop() pgURL, cleanupFn := sqlutils.PGUrl(t, s, security.RootUser, "TestLogSplits") defer cleanupFn() db, err := sql.Open("postgres", pgURL.String()) if err != nil { t.Fatal(err) } defer db.Close() countSplits := func() int { var count int // TODO(mrtracy): this should be a parameterized query, but due to #3660 // it does not work. This should be changed when #3660 is fixed. err := db.QueryRow(fmt.Sprintf(`SELECT COUNT(*) FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))).Scan(&count) if err != nil { t.Fatal(err) } return count } // Count the number of split events. initialSplits := server.ExpectedInitialRangeCount() - 1 if a, e := countSplits(), initialSplits; a != e { t.Fatalf("expected %d initial splits, found %d", e, a) } // Generate an explicit split event. kvDB := s.DB() if err := kvDB.AdminSplit("splitkey"); err != nil { t.Fatal(err) } // verify that every the count has increased by one. if a, e := countSplits(), initialSplits+1; a != e { t.Fatalf("expected %d splits, found %d", e, a) } // verify that RangeID always increases (a good way to see that the splits // are logged correctly) // TODO(mrtracy): Change to parameterized query when #3660 is fixed. rows, err := db.Query(fmt.Sprintf(`SELECT rangeID, otherRangeID, info FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))) if err != nil { t.Fatal(err) } for rows.Next() { var rangeID int64 var otherRangeID sql.NullInt64 var infoStr sql.NullString if err := rows.Scan(&rangeID, &otherRangeID, &infoStr); err != nil { t.Fatal(err) } if !otherRangeID.Valid { t.Errorf("otherRangeID not recorded for split of range %d", rangeID) } if otherRangeID.Int64 <= rangeID { t.Errorf("otherRangeID %d is not greater than rangeID %d", otherRangeID.Int64, rangeID) } // Verify that info returns a json struct. if !infoStr.Valid { t.Errorf("info not recorded for split of range %d", rangeID) } var info struct { UpdatedDesc roachpb.RangeDescriptor NewDesc roachpb.RangeDescriptor } if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil { t.Errorf("error unmarshalling info string for split of range %d: %s", rangeID, err) continue } if int64(info.UpdatedDesc.RangeID) != rangeID { t.Errorf("recorded wrong updated descriptor %s for split of range %d", info.UpdatedDesc, rangeID) } if int64(info.NewDesc.RangeID) != otherRangeID.Int64 { t.Errorf("recorded wrong new descriptor %s for split of range %d", info.NewDesc, rangeID) } } if rows.Err() != nil { t.Fatal(rows.Err()) } // This code assumes that there is only one TestServer, and thus that // StoreID 1 is present on the testserver. If this assumption changes in the // future, *any* store will work, but a new method will need to be added to // Stores (or a creative usage of VisitStores could suffice). store, pErr := s.Stores().GetStore(roachpb.StoreID(1)) if pErr != nil { t.Fatal(pErr) } reg := store.Registry() minSplits := int64(initialSplits + 1) // Verify that the minimimum number of splits has occurred. This is a min // instead of an exact number, because the number of splits seems to vary // between different runs of this test. if a := reg.GetCounter("range.splits").Count(); a < minSplits { t.Errorf("splits = %d < min %d", a, minSplits) } }
func TestLogSplits(t *testing.T) { defer leaktest.AfterTest(t)() s := server.StartTestServer(t) defer s.Stop() pgURL, cleanupFn := sqlutils.PGUrl(t, s, security.RootUser, "TestLogSplits") defer cleanupFn() db, err := sql.Open("postgres", pgURL.String()) if err != nil { t.Fatal(err) } defer db.Close() countSplits := func() int { var count int // TODO(mrtracy): this should be a parameterized query, but due to #3660 // it does not work. This should be changed when #3660 is fixed. err := db.QueryRow(fmt.Sprintf(`SELECT COUNT(*) FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))).Scan(&count) if err != nil { t.Fatal(err) } return count } // Count the number of split events. initialSplits := server.ExpectedInitialRangeCount() - 1 if a, e := countSplits(), initialSplits; a != e { t.Fatalf("expected %d initial splits, found %d", e, a) } // Generate an explicit split event. kvDB := s.DB() if err := kvDB.AdminSplit("splitkey"); err != nil { t.Fatal(err) } // verify that every the count has increased by one. if a, e := countSplits(), initialSplits+1; a != e { t.Fatalf("expected %d splits, found %d", e, a) } // verify that RangeID always increases (a good way to see that the splits // are logged correctly) // TODO(mrtracy): Change to parameterized query when #3660 is fixed. rows, err := db.Query(fmt.Sprintf(`SELECT rangeID, otherRangeID, info FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))) if err != nil { t.Fatal(err) } for rows.Next() { var rangeID int64 var otherRangeID sql.NullInt64 var infoStr sql.NullString if err := rows.Scan(&rangeID, &otherRangeID, &infoStr); err != nil { t.Fatal(err) } if !otherRangeID.Valid { t.Errorf("otherRangeID not recorded for split of range %d", rangeID) } if otherRangeID.Int64 <= rangeID { t.Errorf("otherRangeID %d is not greater than rangeID %d", otherRangeID.Int64, rangeID) } // Verify that info returns a json struct. if !infoStr.Valid { t.Errorf("info not recorded for split of range %d", rangeID) } var info struct { UpdatedDesc roachpb.RangeDescriptor NewDesc roachpb.RangeDescriptor } if err := json.Unmarshal([]byte(infoStr.String), &info); err != nil { t.Errorf("error unmarshalling info string for split of range %d: %s", rangeID, err) continue } if int64(info.UpdatedDesc.RangeID) != rangeID { t.Errorf("recorded wrong updated descriptor %s for split of range %d", info.UpdatedDesc, rangeID) } if int64(info.NewDesc.RangeID) != otherRangeID.Int64 { t.Errorf("recorded wrong new descriptor %s for split of range %d", info.NewDesc, rangeID) } } if rows.Err() != nil { t.Fatal(rows.Err()) } }
func TestLogSplits(t *testing.T) { defer leaktest.AfterTest(t) s := server.StartTestServer(t) defer s.Stop() pgUrl, cleanupFn := sqlutils.PGUrl(t, s, security.RootUser, os.TempDir(), "TestLogSplits") defer cleanupFn() db, err := sql.Open("postgres", pgUrl.String()) if err != nil { t.Fatal(err) } defer db.Close() countSplits := func() int { var count int // TODO(mrtracy): this should be a parameterized query, but due to #3660 // it does not work. This should be changed when #3660 is fixed. err := db.QueryRow(fmt.Sprintf(`SELECT COUNT(*) FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))).Scan(&count) if err != nil { t.Fatal(err) } return count } // Count the number of split events. initialSplits := server.ExpectedInitialRangeCount() - 1 if a, e := countSplits(), initialSplits; a != e { t.Fatalf("expected %d initial splits, found %d", e, a) } // Generate an explicit split event. kvDB, err := s.OpenDBClient(security.NodeUser) if err != nil { t.Fatal(err) } if err := kvDB.AdminSplit("splitkey"); err != nil { t.Fatal(err) } // verify that every the count has increased by one. if a, e := countSplits(), initialSplits+1; a != e { t.Fatalf("expected %d splits, found %d", e, a) } // verify that RangeID always increases (a good way to see that the splits // are logged correctly) // TODO(mrtracy): Change to parameterized query when #3660 is fixed. rows, err := db.Query(fmt.Sprintf(`SELECT rangeID, otherRangeID FROM system.rangelog WHERE eventType = '%s'`, string(storage.RangeEventLogSplit))) if err != nil { t.Fatal(err) } for rows.Next() { var rangeID int64 var otherRangeID sql.NullInt64 if err := rows.Scan(&rangeID, &otherRangeID); err != nil { t.Fatal(err) } if !otherRangeID.Valid { t.Fatalf("otherRangeID not recorded for split of range %d", rangeID) } if otherRangeID.Int64 <= rangeID { t.Fatalf("otherRangeID %d is not greater than rangeID %d", otherRangeID.Int64, rangeID) } } if rows.Err() != nil { t.Fatal(rows.Err()) } }
// TestSplitOnTableBoundaries verifies that ranges get split // as new tables get created. func TestSplitOnTableBoundaries(t *testing.T) { defer leaktest.AfterTest(t)() s, sqlDB, kvDB := setupWithContext(t, getFastScanContext()) defer cleanup(s, sqlDB) expectedInitialRanges := server.ExpectedInitialRangeCount() if _, err := sqlDB.Exec(`CREATE DATABASE test`); err != nil { t.Fatal(err) } // We split up to the largest allocated descriptor ID, be it a table // or a database. util.SucceedsSoon(t, func() error { num, err := getNumRanges(kvDB) if err != nil { return err } if e := expectedInitialRanges + 1; num != e { return util.Errorf("expected %d splits, found %d", e, num) } return nil }) // Verify the actual splits. objectID := uint32(keys.MaxReservedDescID + 1) splits := []roachpb.RKey{keys.MakeTablePrefix(objectID), roachpb.RKeyMax} ranges, err := getRangeKeys(kvDB) if err != nil { t.Fatal(err) } if a, e := ranges[expectedInitialRanges-1:], splits; !rangesMatchSplits(a, e) { t.Fatalf("Found ranges: %v\nexpected: %v", a, e) } // Let's create a table. if _, err := sqlDB.Exec(`CREATE TABLE test.test (k INT PRIMARY KEY, v INT)`); err != nil { t.Fatal(err) } util.SucceedsSoon(t, func() error { num, err := getNumRanges(kvDB) if err != nil { return err } if e := expectedInitialRanges + 2; num != e { return util.Errorf("expected %d splits, found %d", e, num) } return nil }) // Verify the actual splits. splits = []roachpb.RKey{keys.MakeTablePrefix(objectID), keys.MakeTablePrefix(objectID + 1), roachpb.RKeyMax} ranges, err = getRangeKeys(kvDB) if err != nil { t.Fatal(err) } if a, e := ranges[expectedInitialRanges-1:], splits; !rangesMatchSplits(a, e) { t.Fatalf("Found ranges: %v\nexpected: %v", a, e) } }
func testRaftUpdateInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { minAffected := int64(server.ExpectedInitialRangeCount()) mustPost := func(freeze bool) server.ClusterFreezeResponse { reply, err := postFreeze(c, freeze) if err != nil { t.Fatal(util.ErrorfSkipFrames(1, "%v", err)) } return reply } if reply := mustPost(false); reply.RangesAffected != 0 { t.Fatalf("expected initial unfreeze to affect no ranges, got %d", reply.RangesAffected) } if reply := mustPost(true); reply.RangesAffected < minAffected { t.Fatalf("expected >=%d frozen ranges, got %d", minAffected, reply.RangesAffected) } if reply := mustPost(true); reply.RangesAffected != 0 { t.Fatalf("expected second freeze to affect no ranges, got %d", reply.RangesAffected) } if reply := mustPost(false); reply.RangesAffected < minAffected { t.Fatalf("expected >=%d thawed ranges, got %d", minAffected, reply.RangesAffected) } num := c.NumNodes() if num < 3 { t.Skip("skipping remainder of test; needs at least 3 nodes") } // Kill the last node. if err := c.Kill(num - 1); err != nil { t.Fatal(err) } // Attempt to freeze should get stuck (since it does not get confirmation // of the last node receiving the freeze command). if reply, err := postFreeze(c, true); !testutils.IsError(err, "timed out waiting for Range|Timeout exceeded while") { t.Fatalf("expected timeout, got %v: %v", err, reply) } // Shut down the remaining nodes and restart then. for i := 0; i < num-1; i++ { if err := c.Kill(i); err != nil { t.Fatal(err) } } for i := 0; i < num; i++ { if err := c.Restart(i); err != nil { t.Fatal(err) } } // The cluster should now be fully operational (at least after waiting // a little bit) since each node tries to unfreeze everything when it // starts. if err := util.RetryForDuration(time.Minute, func() error { // TODO(tschottdorf): moving the client creation outside of the retry // loop will break the test with the following message: // // client/rpc_sender.go:61: roachpb.Batch RPC failed as client // connection was closed // // Perhaps the cluster updates the address too late after restarting // the node. db, dbStopper := c.NewClient(t, 0) defer dbStopper.Stop() _, err := db.Scan(keys.LocalMax, roachpb.KeyMax, 0) if err != nil { log.Info(err) } return err }); err != nil { t.Fatal(err) } // Unfreezing again should be a no-op. if reply, err := postFreeze(c, false); err != nil { t.Fatal(err) } else if reply.RangesAffected > 0 { t.Fatalf("still %d frozen ranges", reply.RangesAffected) } }