Пример #1
0
// TestReplicateRange verifies basic replication functionality by creating two stores
// and a range, replicating the range to the second store, and reading its data there.
func TestReplicateRange(t *testing.T) {
	defer leaktest.AfterTest(t)
	mtc := multiTestContext{}
	mtc.Start(t, 2)
	defer mtc.Stop()

	// Issue a command on the first node before replicating.
	incArgs, incResp := incrementArgs([]byte("a"), 5, 1, mtc.stores[0].StoreID())
	if err := mtc.stores[0].ExecuteCmd(context.Background(), proto.Call{Args: incArgs, Reply: incResp}); err != nil {
		t.Fatal(err)
	}

	rng, err := mtc.stores[0].GetRange(1)
	if err != nil {
		t.Fatal(err)
	}

	if err := rng.ChangeReplicas(proto.ADD_REPLICA,
		proto.Replica{
			NodeID:  mtc.stores[1].Ident.NodeID,
			StoreID: mtc.stores[1].Ident.StoreID,
		}); err != nil {
		t.Fatal(err)
	}
	// Verify no intent remains on range descriptor key.
	key := keys.RangeDescriptorKey(rng.Desc().StartKey)
	desc := proto.RangeDescriptor{}
	if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key, mtc.stores[0].Clock().Now(), true, nil, &desc); !ok || err != nil {
		t.Fatalf("fetching range descriptor yielded %t, %s", ok, err)
	}
	// Verify that in time, no intents remain on meta addressing
	// keys, and that range descriptor on the meta records is correct.
	util.SucceedsWithin(t, 1*time.Second, func() error {
		meta2 := keys.RangeMetaKey(proto.KeyMax)
		meta1 := keys.RangeMetaKey(meta2)
		for _, key := range []proto.Key{meta2, meta1} {
			metaDesc := proto.RangeDescriptor{}
			if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key, mtc.stores[0].Clock().Now(), true, nil, &metaDesc); !ok || err != nil {
				return util.Errorf("failed to resolve %s", key)
			}
			if !reflect.DeepEqual(metaDesc, desc) {
				return util.Errorf("descs not equal: %+v != %+v", metaDesc, desc)
			}
		}
		return nil
	})

	// Verify that the same data is available on the replica.
	util.SucceedsWithin(t, 1*time.Second, func() error {
		getArgs, getResp := getArgs([]byte("a"), 1, mtc.stores[1].StoreID())
		getArgs.ReadConsistency = proto.INCONSISTENT
		if err := mtc.stores[1].ExecuteCmd(context.Background(), proto.Call{Args: getArgs, Reply: getResp}); err != nil {
			return util.Errorf("failed to read data")
		}
		if v := mustGetInteger(getResp.Value); v != 5 {
			return util.Errorf("failed to read correct data: %d", v)
		}
		return nil
	})
}
Пример #2
0
// TestReplicateRange verifies basic replication functionality by creating two stores
// and a range, replicating the range to the second store, and reading its data there.
func TestReplicateRange(t *testing.T) {
	defer leaktest.AfterTest(t)
	mtc := startMultiTestContext(t, 2)
	defer mtc.Stop()

	// Issue a command on the first node before replicating.
	incArgs := incrementArgs([]byte("a"), 5)
	if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &incArgs); err != nil {
		t.Fatal(err)
	}

	rng, err := mtc.stores[0].GetReplica(1)
	if err != nil {
		t.Fatal(err)
	}

	if err := rng.ChangeReplicas(roachpb.ADD_REPLICA,
		roachpb.ReplicaDescriptor{
			NodeID:  mtc.stores[1].Ident.NodeID,
			StoreID: mtc.stores[1].Ident.StoreID,
		}, rng.Desc()); err != nil {
		t.Fatal(err)
	}
	// Verify no intent remains on range descriptor key.
	key := keys.RangeDescriptorKey(rng.Desc().StartKey)
	desc := roachpb.RangeDescriptor{}
	if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key, mtc.stores[0].Clock().Now(), true, nil, &desc); !ok || err != nil {
		t.Fatalf("fetching range descriptor yielded %t, %s", ok, err)
	}
	// Verify that in time, no intents remain on meta addressing
	// keys, and that range descriptor on the meta records is correct.
	util.SucceedsWithin(t, 1*time.Second, func() error {
		meta2 := keys.Addr(keys.RangeMetaKey(roachpb.RKeyMax))
		meta1 := keys.Addr(keys.RangeMetaKey(meta2))
		for _, key := range []roachpb.RKey{meta2, meta1} {
			metaDesc := roachpb.RangeDescriptor{}
			if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key.AsRawKey(), mtc.stores[0].Clock().Now(), true, nil, &metaDesc); !ok || err != nil {
				return util.Errorf("failed to resolve %s", key.AsRawKey())
			}
			if !reflect.DeepEqual(metaDesc, desc) {
				return util.Errorf("descs not equal: %+v != %+v", metaDesc, desc)
			}
		}
		return nil
	})

	// Verify that the same data is available on the replica.
	util.SucceedsWithin(t, replicaReadTimeout, func() error {
		getArgs := getArgs([]byte("a"))
		if reply, err := client.SendWrappedWith(rg1(mtc.stores[1]), nil, roachpb.Header{
			ReadConsistency: roachpb.INCONSISTENT,
		}, &getArgs); err != nil {
			return util.Errorf("failed to read data: %s", err)
		} else if e, v := int64(5), mustGetInt(reply.(*roachpb.GetResponse).Value); v != e {
			return util.Errorf("failed to read correct data: expected %d, got %d", e, v)
		}
		return nil
	})
}
Пример #3
0
// TestStoreZoneUpdateAndRangeSplit verifies that modifying the zone
// configuration changes range max bytes and Range.maybeSplit() takes
// max bytes into account when deciding whether to enqueue a range for
// splitting. It further verifies that the range is in fact split on
// exceeding zone's RangeMaxBytes.
func TestStoreZoneUpdateAndRangeSplit(t *testing.T) {
	defer leaktest.AfterTest(t)
	store, stopper := createTestStore(t)
	config.TestingSetupZoneConfigHook(stopper)
	defer stopper.Stop()

	maxBytes := int64(1 << 16)
	// Set max bytes.
	descID := uint32(keys.MaxReservedDescID + 1)
	config.TestingSetZoneConfig(descID, &config.ZoneConfig{RangeMaxBytes: maxBytes})

	// Trigger gossip callback.
	if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfig{}, 0); err != nil {
		t.Fatal(err)
	}

	// Wait for the range to be split along table boundaries.
	originalRange := store.LookupReplica(roachpb.RKey(keys.UserTableDataMin), nil)
	var rng *storage.Replica
	util.SucceedsWithin(t, splitTimeout, func() error {
		rng = store.LookupReplica(keys.MakeTablePrefix(descID), nil)
		if rng.RangeID == originalRange.RangeID {
			return util.Errorf("expected new range created by split")
		}
		return nil
	})

	// Check range's max bytes settings.
	if rng.GetMaxBytes() != maxBytes {
		t.Fatalf("range max bytes mismatch, got: %d, expected: %d", rng.GetMaxBytes(), maxBytes)
	}

	// Make sure the second range goes to the end.
	if !roachpb.RKeyMax.Equal(rng.Desc().EndKey) {
		t.Fatalf("second range has split: %+v", rng.Desc())
	}

	// Look in the range after prefix we're writing to.
	fillRange(store, rng.RangeID, keys.MakeTablePrefix(descID), maxBytes, t)

	// Verify that the range is in fact split (give it a few seconds for very
	// slow test machines).
	var newRng *storage.Replica
	util.SucceedsWithin(t, splitTimeout, func() error {
		newRng = store.LookupReplica(keys.MakeTablePrefix(descID+1), nil)
		if newRng.RangeID == rng.RangeID {
			return util.Errorf("range has not yet split")
		}
		return nil
	})

	// Make sure the new range goes to the end.
	if !roachpb.RKeyMax.Equal(newRng.Desc().EndKey) {
		t.Fatalf("second range has split: %+v", rng.Desc())
	}
}
// TestStoreZoneUpdateAndRangeSplit verifies that modifying the zone
// configuration changes range max bytes and Range.maybeSplit() takes
// max bytes into account when deciding whether to enqueue a range for
// splitting. It further verifies that the range is in fact split on
// exceeding zone's RangeMaxBytes.
func TestStoreZoneUpdateAndRangeSplit(t *testing.T) {
	defer leaktest.AfterTest(t)
	store, stopper := createTestStore(t)
	config.TestingSetupZoneConfigHook(stopper)
	defer stopper.Stop()

	maxBytes := int64(1 << 16)
	// Set max bytes.
	descID := uint32(keys.MaxReservedDescID + 1)
	config.TestingSetZoneConfig(descID, &config.ZoneConfig{RangeMaxBytes: maxBytes})

	// Trigger gossip callback.
	if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfig{}, 0); err != nil {
		t.Fatal(err)
	}

	tableBoundary := keys.MakeTablePrefix(descID)

	{
		var rng *storage.Replica

		// Wait for the range to be split along table boundaries.
		expectedRSpan := roachpb.RSpan{Key: roachpb.RKey(tableBoundary), EndKey: roachpb.RKeyMax}
		util.SucceedsWithin(t, splitTimeout, func() error {
			rng = store.LookupReplica(tableBoundary, nil)
			if actualRSpan := rng.Desc().RSpan(); !actualRSpan.Equal(expectedRSpan) {
				return util.Errorf("expected range %s to span %s", rng, expectedRSpan)
			}
			return nil
		})

		// Check range's max bytes settings.
		if actualMaxBytes := rng.GetMaxBytes(); actualMaxBytes != maxBytes {
			t.Fatalf("range %s max bytes mismatch, got: %d, expected: %d", rng, actualMaxBytes, maxBytes)
		}

		// Look in the range after prefix we're writing to.
		fillRange(store, rng.RangeID, tableBoundary, maxBytes, t)
	}

	// Verify that the range is in fact split.
	util.SucceedsWithin(t, splitTimeout, func() error {
		rng := store.LookupReplica(keys.MakeTablePrefix(descID+1), nil)
		rngDesc := rng.Desc()
		rngStart, rngEnd := rngDesc.StartKey, rngDesc.EndKey
		if rngStart.Equal(tableBoundary) || !rngEnd.Equal(roachpb.RKeyMax) {
			return util.Errorf("range %s has not yet split", rng)
		}
		return nil
	})
}
Пример #5
0
// TestReplicaGCQueueDropReplica verifies that a removed replica is
// immediately cleaned up.
func TestReplicaGCQueueDropReplicaDirect(t *testing.T) {
	defer leaktest.AfterTest(t)
	mtc := &multiTestContext{}
	const numStores = 3
	rangeID := roachpb.RangeID(1)

	// In this test, the Replica on the second Node is removed, and the test
	// verifies that that Node adds this Replica to its RangeGCQueue. However,
	// the queue does a consistent lookup which will usually be read from
	// Node 1. Hence, if Node 1 hasn't processed the removal when Node 2 has,
	// no GC will take place since the consistent RangeLookup hits the first
	// Node. We use the TestingCommandFilter to make sure that the second Node
	// waits for the first.
	storage.TestingCommandFilter = func(id roachpb.StoreID, args roachpb.Request, _ roachpb.Header) error {
		et, ok := args.(*roachpb.EndTransactionRequest)
		if !ok || id != 2 {
			return nil
		}
		rct := et.InternalCommitTrigger.GetChangeReplicasTrigger()
		if rct == nil || rct.ChangeType != roachpb.REMOVE_REPLICA {
			return nil
		}
		util.SucceedsWithin(t, time.Second, func() error {
			r, err := mtc.stores[0].GetReplica(rangeID)
			if err != nil {
				return err
			}
			if i, _ := r.Desc().FindReplica(2); i >= 0 {
				return errors.New("expected second node gone from first node's known replicas")
			}
			return nil
		})
		return nil
	}

	defer func() { storage.TestingCommandFilter = nil }()

	mtc.Start(t, numStores)
	defer mtc.Stop()

	mtc.replicateRange(rangeID, 1, 2)
	mtc.unreplicateRange(rangeID, 1)

	// Make sure the range is removed from the store.
	util.SucceedsWithin(t, 10*time.Second, func() error {
		if _, err := mtc.stores[1].GetReplica(rangeID); !testutils.IsError(err, "range .* was not found") {
			return util.Errorf("expected range removal")
		}
		return nil
	})
}
Пример #6
0
// TestClientDisconnectRedundant verifies that the gossip server
// will drop an outgoing client connection that is already an
// inbound client connection of another node.
func TestClientDisconnectRedundant(t *testing.T) {
	defer leaktest.AfterTest(t)
	local, remote, stopper := startGossip(t)
	defer stopper.Stop()
	// startClient requires locks are held, so acquire here.
	local.mu.Lock()
	remote.mu.Lock()

	rAddr := remote.is.NodeAddr
	lAddr := local.is.NodeAddr
	local.startClient(rAddr, stopper)
	remote.startClient(lAddr, stopper)
	local.mu.Unlock()
	remote.mu.Unlock()
	local.manage(stopper)
	remote.manage(stopper)
	util.SucceedsWithin(t, 10*time.Second, func() error {
		// Check which of the clients is connected to the other.
		ok1 := local.findClient(func(c *client) bool { return c.addr.String() == rAddr.String() }) != nil
		ok2 := remote.findClient(func(c *client) bool { return c.addr.String() == lAddr.String() }) != nil
		// We expect node 1 to disconnect; if both are still connected,
		// it's possible that node 1 gossiped before node 2 connected, in
		// which case we have to gossip from node 1 to trigger the
		// disconnect redundant client code.
		if ok1 && ok2 {
			if err := local.AddInfo("local-key", nil, time.Second); err != nil {
				t.Fatal(err)
			}
		} else if !ok1 && ok2 && verifyServerMaps(local, 1) && verifyServerMaps(remote, 0) {
			return nil
		}
		return errors.New("local client to remote not yet closed as redundant")
	})
}
Пример #7
0
// TestRemoveRangeWithoutGC ensures that we do not panic when a
// replica has been removed but not yet GC'd (and therefore
// does not have an active raft group).
func TestRemoveRangeWithoutGC(t *testing.T) {
	defer leaktest.AfterTest(t)

	mtc := startMultiTestContext(t, 2)
	defer mtc.Stop()
	// Disable the GC queue and move the range from store 0 to 1.
	mtc.stores[0].DisableReplicaGCQueue(true)
	const rangeID roachpb.RangeID = 1
	mtc.replicateRange(rangeID, 1)
	mtc.unreplicateRange(rangeID, 0)

	// Wait for store 0 to process the removal.
	util.SucceedsWithin(t, time.Second, func() error {
		rep, err := mtc.stores[0].GetReplica(rangeID)
		if err != nil {
			return err
		}
		desc := rep.Desc()
		if len(desc.Replicas) != 1 {
			return util.Errorf("range has %d replicas", len(desc.Replicas))
		}
		return nil
	})

	// The replica's data is still on disk even though the Replica
	// object is removed.
	var desc roachpb.RangeDescriptor
	descKey := keys.RangeDescriptorKey(roachpb.RKeyMin)
	if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), descKey,
		mtc.stores[0].Clock().Now(), true, nil, &desc); err != nil {
		t.Fatal(err)
	} else if !ok {
		t.Fatal("expected range descriptor to be present")
	}

	// Stop and restart the store to reset the replica's raftGroup
	// pointer to nil. As long as the store has not been restarted it
	// can continue to use its last known replica ID.
	mtc.stopStore(0)
	mtc.restartStore(0)
	// Turn off the GC queue to ensure that the replica is deleted at
	// startup instead of by the scanner. This is not 100% guaranteed
	// since the scanner could have already run at this point, but it
	// should be enough to prevent us from accidentally relying on the
	// scanner.
	mtc.stores[0].DisableReplicaGCQueue(true)

	// The Replica object is not recreated.
	if _, err := mtc.stores[0].GetReplica(rangeID); err == nil {
		t.Fatalf("expected replica to be missing")
	}

	// And the data is no longer on disk.
	if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), descKey,
		mtc.stores[0].Clock().Now(), true, nil, &desc); err != nil {
		t.Fatal(err)
	} else if ok {
		t.Fatal("expected range descriptor to be absent")
	}
}
Пример #8
0
// checkGossip fetches the gossip infoStore from each node and invokes the given
// function. The test passes if the function returns 0 for every node,
// retrying for up to the given duration.
func checkGossip(t *testing.T, l *localcluster.Cluster, d time.Duration,
	f checkGossipFunc) {
	util.SucceedsWithin(t, d, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case e := <-l.Events:
			if log.V(1) {
				log.Infof("%+v", e)
			}
			return fmt.Errorf("event: %+v", e)
		case <-time.After(1 * time.Second):
		}

		for i, node := range l.Nodes {
			var m map[string]interface{}
			if err := node.GetJSON("", "/_status/gossip/local", &m); err != nil {
				return err
			}
			infos := m["infos"].(map[string]interface{})
			if err := f(infos); err != nil {
				return util.Errorf("node %d: %s", i, err)
			}
		}

		return nil
	})
}
Пример #9
0
// TestMetricsRecording verifies that Node statistics are periodically recorded
// as time series data.
func TestMetricsRecording(t *testing.T) {
	defer leaktest.AfterTest(t)
	tsrv := &TestServer{}
	tsrv.Ctx = NewTestContext()
	tsrv.Ctx.MetricsFrequency = 5 * time.Millisecond
	if err := tsrv.Start(); err != nil {
		t.Fatal(err)
	}
	defer tsrv.Stop()

	checkTimeSeriesKey := func(now int64, keyName string) error {
		key := ts.MakeDataKey(keyName, "", ts.Resolution10s, now)
		data := &proto.InternalTimeSeriesData{}
		return tsrv.db.GetProto(key, data)
	}

	// Verify that metrics for the current timestamp are recorded. This should
	// be true very quickly.
	util.SucceedsWithin(t, time.Second, func() error {
		now := tsrv.Clock().PhysicalNow()
		if err := checkTimeSeriesKey(now, "cr.store.livebytes.1"); err != nil {
			return err
		}
		if err := checkTimeSeriesKey(now, "cr.node.sys.allocbytes.1"); err != nil {
			return err
		}
		return nil
	})
}
Пример #10
0
func TestBuildInfo(t *testing.T) {
	c := StartCluster(t)
	defer c.AssertAndStop(t)

	checkGossip(t, c, 20*time.Second, hasPeers(c.NumNodes()))

	util.SucceedsWithin(t, 10*time.Second, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		default:
		}
		var r struct {
			BuildInfo map[string]string
		}
		if err := getJSON(c.URL(0), "/_status/details/local", &r); err != nil {
			return err
		}
		for _, key := range []string{"goVersion", "tag", "time", "dependencies"} {
			if val, ok := r.BuildInfo[key]; !ok {
				t.Errorf("build info missing for \"%s\"", key)
			} else if val == "" {
				t.Errorf("build info not set for \"%s\"", key)
			}
		}
		return nil
	})
}
Пример #11
0
// TestClientDisallowMultipleConns verifies that the server disallows
// multiple connections from the same client node ID.
func TestClientDisallowMultipleConns(t *testing.T) {
	defer leaktest.AfterTest(t)
	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t)
	remote := startGossip(2, stopper, t)
	local.mu.Lock()
	remote.mu.Lock()
	rAddr := remote.is.NodeAddr
	// Start two clients from local to remote. RPC client cache is
	// disabled via the context, so we'll start two different outgoing
	// connections.
	local.startClient(rAddr, stopper)
	local.startClient(rAddr, stopper)
	local.mu.Unlock()
	remote.mu.Unlock()
	local.manage()
	remote.manage()
	util.SucceedsWithin(t, 10*time.Second, func() error {
		// Verify that the remote server has only a single incoming
		// connection and the local server has only a single outgoing
		// connection.
		local.mu.Lock()
		remote.mu.Lock()
		outgoing := local.outgoing.len()
		incoming := remote.incoming.len()
		local.mu.Unlock()
		remote.mu.Unlock()
		if outgoing == 1 && incoming == 1 && verifyServerMaps(local, 0) && verifyServerMaps(remote, 1) {
			return nil
		}
		return util.Errorf("incorrect number of incoming (%d) or outgoing (%d) connections", incoming, outgoing)
	})
}
Пример #12
0
// TestStoreRangeSplitWithMaxBytesUpdate tests a scenario where a new
// zone config that updates the max bytes is set and triggers a range
// split.
func TestStoreRangeSplitWithMaxBytesUpdate(t *testing.T) {
	defer leaktest.AfterTest(t)
	store, stopper := createTestStore(t)
	config.TestingSetupZoneConfigHook(stopper)
	defer stopper.Stop()

	origRng := store.LookupReplica(roachpb.RKeyMin, nil)

	// Set max bytes.
	maxBytes := int64(1 << 16)
	config.TestingSetZoneConfig(1000, &config.ZoneConfig{RangeMaxBytes: maxBytes})

	// Trigger gossip callback.
	if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfig{}, 0); err != nil {
		t.Fatal(err)
	}

	// Verify that the range is split and the new range has the correct max bytes.
	util.SucceedsWithin(t, time.Second, func() error {
		newRng := store.LookupReplica(keys.MakeTablePrefix(1000), nil)
		if newRng.Desc().RangeID == origRng.Desc().RangeID {
			return util.Errorf("expected new range created by split")
		}
		if newRng.GetMaxBytes() != maxBytes {
			return util.Errorf("expected %d max bytes for the new range, but got %d",
				maxBytes, newRng.GetMaxBytes())
		}
		return nil
	})
}
Пример #13
0
func TestBuildInfo(t *testing.T) {
	if *numLocal == 0 {
		t.Skip("skipping since not run against local cluster")
	}
	l := cluster.CreateLocal(1, 1, *logDir, stopper) // intentionally using a local cluster
	l.Start()
	defer l.AssertAndStop(t)

	checkGossip(t, l, 20*time.Second, hasPeers(l.NumNodes()))

	util.SucceedsWithin(t, 10*time.Second, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case <-time.After(200 * time.Millisecond):
		}
		var r struct {
			BuildInfo map[string]string
		}
		if err := l.Nodes[0].GetJSON("", "/_status/details/local", &r); err != nil {
			return err
		}
		for _, key := range []string{"goVersion", "tag", "time", "dependencies"} {
			if val, ok := r.BuildInfo[key]; !ok {
				t.Errorf("build info missing for \"%s\"", key)
			} else if val == "" {
				t.Errorf("build info not set for \"%s\"", key)
			}
		}
		return nil
	})
}
// checkGossip fetches the gossip infoStore from each node and invokes the given
// function. The test passes if the function returns 0 for every node,
// retrying for up to the given duration.
func checkGossip(t *testing.T, c cluster.Cluster, d time.Duration,
	f checkGossipFunc) {
	util.SucceedsWithin(t, d, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case <-time.After(1 * time.Second):
		}

		for i := 0; i < c.NumNodes(); i++ {
			var m map[string]interface{}
			if err := getJSON(c.URL(i), "/_status/gossip/local", &m); err != nil {
				return err
			}
			infos, ok := m["infos"].(map[string]interface{})
			if !ok {
				return errors.New("no infos yet")
			}
			if err := f(infos); err != nil {
				return util.Errorf("node %d: %s", i, err)
			}
		}

		return nil
	})
}
Пример #15
0
// TestScannerTiming verifies that ranges are scanned, regardless
// of how many, to match scanInterval.
func TestScannerTiming(t *testing.T) {
	defer leaktest.AfterTest(t)
	const count = 3
	const runTime = 100 * time.Millisecond
	const maxError = 7500 * time.Microsecond
	durations := []time.Duration{
		15 * time.Millisecond,
		25 * time.Millisecond,
	}
	for i, duration := range durations {
		util.SucceedsWithin(t, 10*time.Second, func() error {
			ranges := newTestRangeSet(count, t)
			q := &testQueue{}
			s := newReplicaScanner(duration, 0, ranges)
			s.AddQueues(q)
			mc := hlc.NewManualClock(0)
			clock := hlc.NewClock(mc.UnixNano)
			stopper := stop.NewStopper()
			s.Start(clock, stopper)
			time.Sleep(runTime)
			stopper.Stop()

			avg := s.avgScan()
			log.Infof("%d: average scan: %s", i, avg)
			if avg.Nanoseconds()-duration.Nanoseconds() > maxError.Nanoseconds() ||
				duration.Nanoseconds()-avg.Nanoseconds() > maxError.Nanoseconds() {
				return util.Errorf("expected %s, got %s: exceeds max error of %s", duration, avg, maxError)
			}
			return nil
		})
	}
}
Пример #16
0
// TestGossipCullNetwork verifies that a client will be culled from
// the network periodically (at cullInterval duration intervals).
func TestGossipCullNetwork(t *testing.T) {
	defer leaktest.AfterTest(t)

	// Create the local gossip and minPeers peers.
	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t)
	local.SetCullInterval(5 * time.Millisecond)
	peers := []*Gossip{}
	for i := 0; i < minPeers; i++ {
		peers = append(peers, startGossip(roachpb.NodeID(i+2), stopper, t))
	}

	// Start clients to all peers and start the local gossip's manage routine.
	local.mu.Lock()
	for _, p := range peers {
		pAddr := p.is.NodeAddr
		local.startClient(pAddr, stopper)
	}
	local.mu.Unlock()
	local.manage(stopper)

	util.SucceedsWithin(t, 10*time.Second, func() error {
		// Verify that a client is closed within the cull interval.
		if len(local.Outgoing()) == minPeers-1 {
			return nil
		}
		return errors.New("no network culling occurred")
	})
}
Пример #17
0
// replicateRange replicates the given range onto the given stores.
func (m *multiTestContext) replicateRange(rangeID proto.RangeID, sourceStoreIndex int, dests ...int) {
	rng, err := m.stores[sourceStoreIndex].GetReplica(rangeID)
	if err != nil {
		m.t.Fatal(err)
	}

	for _, dest := range dests {
		err = rng.ChangeReplicas(proto.ADD_REPLICA,
			proto.Replica{
				NodeID:  m.stores[dest].Ident.NodeID,
				StoreID: m.stores[dest].Ident.StoreID,
			}, rng.Desc())
		if err != nil {
			m.t.Fatal(err)
		}
	}

	// Wait for the replication to complete on all destination nodes.
	util.SucceedsWithin(m.t, time.Second, func() error {
		for _, dest := range dests {
			// Use LookupRange(keys) instead of GetRange(rangeID) to ensure that the
			// snapshot has been transferred and the descriptor initialized.
			if m.stores[dest].LookupReplica(rng.Desc().StartKey, nil) == nil {
				return util.Errorf("range not found on store %d", dest)
			}
		}
		return nil
	})
}
Пример #18
0
func checkRangeReplication(t *testing.T, cluster *localcluster.Cluster, d time.Duration) {
	// Always talk to node 0.
	client, dbStopper := makeDBClient(t, cluster, 0)
	defer dbStopper.Stop()

	wantedReplicas := 3
	if len(cluster.Nodes) < 3 {
		wantedReplicas = len(cluster.Nodes)
	}

	log.Infof("waiting for first range to have %d replicas", wantedReplicas)

	util.SucceedsWithin(t, d, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case <-time.After(1 * time.Second):
		}

		foundReplicas, err := countRangeReplicas(client)
		if err != nil {
			return err
		}

		log.Infof("found %d replicas", foundReplicas)
		if foundReplicas >= wantedReplicas {
			return nil
		}
		return fmt.Errorf("expected %d replicas, only found %d", wantedReplicas, foundReplicas)
	})
}
Пример #19
0
func TestBuildInfo(t *testing.T) {
	l := localcluster.Create(1, stopper)
	l.Start()
	defer l.AssertAndStop(t)

	util.SucceedsWithin(t, 10*time.Second, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case <-time.After(200 * time.Millisecond):
		}
		var r struct {
			BuildInfo map[string]string
		}
		if err := l.Nodes[0].GetJSON("", "/_status/details/local", &r); err != nil {
			return err
		}
		for _, key := range []string{"goVersion", "tag", "time", "dependencies"} {
			if val, ok := r.BuildInfo[key]; !ok {
				t.Errorf("build info missing for \"%s\"", key)
			} else if val == "" {
				t.Errorf("build info not set for \"%s\"", key)
			}
		}
		return nil
	})
}
Пример #20
0
// TestClientGossip verifies a client can gossip a delta to the server.
func TestClientGossip(t *testing.T) {
	defer leaktest.AfterTest(t)
	local, remote, stopper := startGossip(t)
	disconnected := make(chan *client, 1)
	client := newClient(remote.is.NodeAddr)

	defer func() {
		stopper.Stop()
		if client != <-disconnected {
			t.Errorf("expected client disconnect after remote close")
		}
	}()

	if err := local.AddInfo("local-key", nil, time.Second); err != nil {
		t.Fatal(err)
	}
	if err := remote.AddInfo("remote-key", nil, time.Second); err != nil {
		t.Fatal(err)
	}

	// Use an insecure context. We're talking to tcp socket which are not in the certs.
	lclock := hlc.NewClock(hlc.UnixNano)
	rpcContext := rpc.NewContext(&base.Context{Insecure: true}, lclock, stopper)
	client.start(local, disconnected, rpcContext, stopper)

	util.SucceedsWithin(t, 500*time.Millisecond, func() error {
		if _, err := remote.GetInfo("local-key"); err != nil {
			return err
		}
		if _, err := local.GetInfo("remote-key"); err != nil {
			return err
		}
		return nil
	})
}
Пример #21
0
// TestRaftAfterRemoveRange verifies that the raft state removes
// a remote node correctly after the Replica was removed from the Store.
func TestRaftAfterRemoveRange(t *testing.T) {
	defer leaktest.AfterTest(t)
	mtc := startMultiTestContext(t, 3)
	defer mtc.Stop()

	// Make the split.
	splitArgs := adminSplitArgs(roachpb.KeyMin, []byte("b"))
	if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &splitArgs); err != nil {
		t.Fatal(err)
	}

	rangeID := roachpb.RangeID(2)
	mtc.replicateRange(rangeID, 1, 2)

	mtc.unreplicateRange(rangeID, 2)
	mtc.unreplicateRange(rangeID, 1)

	// Wait for the removal to be processed.
	util.SucceedsWithin(t, time.Second, func() error {
		_, err := mtc.stores[1].GetReplica(rangeID)
		if _, ok := err.(*roachpb.RangeNotFoundError); ok {
			return nil
		} else if err != nil {
			return err
		}
		return util.Errorf("range still exists")
	})

	replica1 := roachpb.ReplicaDescriptor{
		ReplicaID: roachpb.ReplicaID(mtc.stores[1].StoreID()),
		NodeID:    roachpb.NodeID(mtc.stores[1].StoreID()),
		StoreID:   mtc.stores[1].StoreID(),
	}
	replica2 := roachpb.ReplicaDescriptor{
		ReplicaID: roachpb.ReplicaID(mtc.stores[2].StoreID()),
		NodeID:    roachpb.NodeID(mtc.stores[2].StoreID()),
		StoreID:   mtc.stores[2].StoreID(),
	}
	if err := mtc.transport.Send(&storage.RaftMessageRequest{
		GroupID:     0,
		ToReplica:   replica1,
		FromReplica: replica2,
		Message: raftpb.Message{
			From: uint64(replica2.ReplicaID),
			To:   uint64(replica1.ReplicaID),
			Type: raftpb.MsgHeartbeat,
		}}); err != nil {
		t.Fatal(err)
	}
	// Execute another replica change to ensure that raft has processed
	// the heartbeat just sent.
	mtc.replicateRange(roachpb.RangeID(1), 1)

	// Expire leases to ensure any remaining intent resolutions can complete.
	// TODO(bdarnell): understand why some tests need this.
	mtc.expireLeaderLeases()
}
Пример #22
0
// TestStoreScanInconsistentResolvesIntents lays down 10 intents,
// commits the txn without resolving intents, then does repeated
// inconsistent reads until the data shows up, showing that the
// inconsistent reads are triggering intent resolution.
func TestStoreScanInconsistentResolvesIntents(t *testing.T) {
	defer leaktest.AfterTest(t)
	// This test relies on having a committed Txn record and open intents on
	// the same Range. This only works with auto-gc turned off; alternatively
	// the test could move to splitting its underlying Range.
	defer withoutTxnAutoGC()()
	var intercept atomic.Value
	intercept.Store(true)
	TestingCommandFilter = func(args proto.Request) error {
		if _, ok := args.(*proto.ResolveIntentRequest); ok && intercept.Load().(bool) {
			return util.Errorf("error on purpose")
		}
		return nil
	}
	store, _, stopper := createTestStore(t)
	defer func() { TestingCommandFilter = nil }()
	defer stopper.Stop()

	// Lay down 10 intents to scan over.
	txn := newTransaction("test", proto.Key("foo"), 1, proto.SERIALIZABLE, store.ctx.Clock)
	keys := []proto.Key{}
	for j := 0; j < 10; j++ {
		key := proto.Key(fmt.Sprintf("key%02d", j))
		keys = append(keys, key)
		args := putArgs(key, []byte(fmt.Sprintf("value%02d", j)), 1, store.StoreID())
		args.Txn = txn
		if _, err := store.ExecuteCmd(context.Background(), &args); err != nil {
			t.Fatal(err)
		}
	}

	// Now, commit txn without resolving intents. If we hadn't disabled auto-gc
	// of Txn entries in this test, the Txn entry would be removed and later
	// attempts to resolve the intents would fail.
	etArgs := endTxnArgs(txn, true, 1, store.StoreID())
	etArgs.Timestamp = txn.Timestamp
	if _, err := store.ExecuteCmd(context.Background(), &etArgs); err != nil {
		t.Fatal(err)
	}

	intercept.Store(false) // allow async intent resolution

	// Scan the range repeatedly until we've verified count.
	sArgs := scanArgs(keys[0], keys[9].Next(), 1, store.StoreID())
	sArgs.ReadConsistency = proto.INCONSISTENT
	util.SucceedsWithin(t, time.Second, func() error {
		if reply, err := store.ExecuteCmd(context.Background(), &sArgs); err != nil {
			return err
		} else if sReply := reply.(*proto.ScanResponse); len(sReply.Rows) != 10 {
			return util.Errorf("could not read rows as expected")
		}
		return nil
	})
}
Пример #23
0
// TestRangeCommandClockUpdate verifies that followers update their
// clocks when executing a command, even if the leader's clock is far
// in the future.
func TestRangeCommandClockUpdate(t *testing.T) {
	defer leaktest.AfterTest(t)

	const numNodes = 3
	var manuals []*hlc.ManualClock
	var clocks []*hlc.Clock
	for i := 0; i < numNodes; i++ {
		manuals = append(manuals, hlc.NewManualClock(1))
		clocks = append(clocks, hlc.NewClock(manuals[i].UnixNano))
		clocks[i].SetMaxOffset(100 * time.Millisecond)
	}
	mtc := multiTestContext{
		clocks: clocks,
	}
	mtc.Start(t, numNodes)
	defer mtc.Stop()
	mtc.replicateRange(1, 0, 1, 2)

	// Advance the leader's clock ahead of the followers (by more than
	// MaxOffset but less than the leader lease) and execute a command.
	manuals[0].Increment(int64(500 * time.Millisecond))
	incArgs, incResp := incrementArgs([]byte("a"), 5, 1, mtc.stores[0].StoreID())
	incArgs.Timestamp = clocks[0].Now()
	if err := mtc.stores[0].ExecuteCmd(context.Background(), proto.Call{Args: incArgs, Reply: incResp}); err != nil {
		t.Fatal(err)
	}

	// Wait for that command to execute on all the followers.
	util.SucceedsWithin(t, 50*time.Millisecond, func() error {
		values := []int64{}
		for _, eng := range mtc.engines {
			val, _, err := engine.MVCCGet(eng, proto.Key("a"), clocks[0].Now(), true, nil)
			if err != nil {
				return err
			}
			values = append(values, mustGetInteger(val))
		}
		if !reflect.DeepEqual(values, []int64{5, 5, 5}) {
			return util.Errorf("expected (5, 5, 5), got %v", values)
		}
		return nil
	})

	// Verify that all the followers have accepted the clock update from
	// node 0 even though it comes from outside the usual max offset.
	now := clocks[0].Now()
	for i, clock := range clocks {
		// Only compare the WallTimes: it's normal for clock 0 to be a few logical ticks ahead.
		if clock.Now().WallTime < now.WallTime {
			t.Errorf("clock %d is behind clock 0: %s vs %s", i, clock.Now(), now)
		}
	}
}
Пример #24
0
// TestClientRegisterInitNodeID verifies two client's gossip request with NodeID 0.
func TestClientRegisterWithInitNodeID(t *testing.T) {
	defer leaktest.AfterTest(t)
	stopper := stop.NewStopper()
	defer stopper.Stop()

	// Create three gossip nodes, and connect to the first with NodeID 0.
	var g []*Gossip
	var gossipAddr string
	for i := 0; i < 3; i++ {
		clock := hlc.NewClock(hlc.UnixNano)
		RPCContext := rpc.NewContext(&base.Context{Insecure: true}, clock, stopper)

		addr := util.CreateTestAddr("tcp")
		server := grpc.NewServer()
		TLSConfig, err := RPCContext.GetServerTLSConfig()
		if err != nil {
			t.Fatal(err)
		}
		ln, err := grpcutil.ListenAndServeGRPC(stopper, server, addr, TLSConfig)
		if err != nil {
			t.Fatal(err)
		}

		// Connect to the first gossip node.
		if gossipAddr == "" {
			gossipAddr = ln.Addr().String()
		}

		var resolvers []resolver.Resolver
		resolver, _ := resolver.NewResolver(&RPCContext.Context, gossipAddr)
		resolvers = append(resolvers, resolver)
		gnode := New(RPCContext, resolvers, stopper)
		// node ID must be non-zero
		gnode.SetNodeID(roachpb.NodeID(i + 1))
		g = append(g, gnode)
		gnode.Start(server, ln.Addr())
	}

	util.SucceedsWithin(t, 5*time.Second, func() error {
		// The first gossip node should have two gossip client address
		// in nodeMap if these three gossip nodes registered success.
		g[0].mu.Lock()
		defer g[0].mu.Unlock()
		if a, e := len(g[0].nodeMap), 2; a != e {
			return util.Errorf("expected %s to contain %d nodes, got %d", g[0].nodeMap, e, a)
		}
		return nil
	})
}
Пример #25
0
// TestReplicateAfterSplit verifies that a new replica whose start key
// is not KeyMin replicating to a fresh store can apply snapshots correctly.
func TestReplicateAfterSplit(t *testing.T) {
	defer leaktest.AfterTest(t)
	mtc := startMultiTestContext(t, 2)
	defer mtc.Stop()

	rangeID := roachpb.RangeID(1)
	splitKey := roachpb.Key("m")
	key := roachpb.Key("z")

	store0 := mtc.stores[0]
	// Make the split
	splitArgs := adminSplitArgs(roachpb.KeyMin, splitKey)
	if _, err := client.SendWrapped(rg1(store0), nil, &splitArgs); err != nil {
		t.Fatal(err)
	}

	rangeID2 := store0.LookupReplica(roachpb.RKey(key), nil).RangeID
	if rangeID2 == rangeID {
		t.Errorf("got same range id after split")
	}
	// Issue an increment for later check.
	incArgs := incrementArgs(key, 11)
	if _, err := client.SendWrappedWith(rg1(store0), nil, roachpb.Header{
		RangeID: rangeID2,
	}, &incArgs); err != nil {
		t.Fatal(err)
	}
	// Now add the second replica.
	mtc.replicateRange(rangeID2, 1)

	if mtc.stores[1].LookupReplica(roachpb.RKey(key), nil).GetMaxBytes() == 0 {
		t.Error("Range MaxBytes is not set after snapshot applied")
	}
	// Once it catches up, the effects of increment commands can be seen.
	util.SucceedsWithin(t, replicaReadTimeout, func() error {
		getArgs := getArgs(key)
		// Reading on non-leader replica should use inconsistent read
		if reply, err := client.SendWrappedWith(rg1(mtc.stores[1]), nil, roachpb.Header{
			RangeID:         rangeID2,
			ReadConsistency: roachpb.INCONSISTENT,
		}, &getArgs); err != nil {
			return util.Errorf("failed to read data: %s", err)
		} else if e, v := int64(11), mustGetInt(reply.(*roachpb.GetResponse).Value); v != e {
			return util.Errorf("failed to read correct data: expected %d, got %d", e, v)
		}
		return nil
	})
}
Пример #26
0
// waitUntilDead will block until the specified store is marked as dead.
func waitUntilDead(t *testing.T, sp *StorePool, storeID roachpb.StoreID) {
	util.SucceedsWithin(t, 10*TestTimeUntilStoreDead, func() error {
		sp.mu.RLock()
		defer sp.mu.RUnlock()
		store, ok := sp.stores[storeID]
		if !ok {
			t.Fatalf("store %s isn't in the pool's store list", storeID)
		}
		exitcode := store.dead

		if exitcode {
			return nil
		}
		return errors.New("store not marked as dead yet")
	})
}
Пример #27
0
// TestGetNodeDescriptor checks that the Node descriptor automatically gets
// looked up from Gossip.
func TestGetNodeDescriptor(t *testing.T) {
	defer leaktest.AfterTest(t)
	g, s := makeTestGossip(t)
	defer s()
	ds := NewDistSender(&DistSenderContext{}, g)
	if err := g.SetNodeDescriptor(&roachpb.NodeDescriptor{NodeID: 5}); err != nil {
		t.Fatal(err)
	}
	util.SucceedsWithin(t, time.Second, func() error {
		desc := ds.getNodeDescriptor()
		if desc != nil && desc.NodeID == 5 {
			return nil
		}
		return util.Errorf("wanted NodeID 5, got %v", desc)
	})
}
Пример #28
0
// TestNodeJoin verifies a new node is able to join a bootstrapped
// cluster consisting of one node.
func TestNodeJoin(t *testing.T) {
	defer leaktest.AfterTest(t)
	engineStopper := stop.NewStopper()
	defer engineStopper.Stop()
	e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)
	if _, err := bootstrapCluster([]engine.Engine{e}); err != nil {
		t.Fatal(err)
	}

	// Start the bootstrap node.
	engines1 := []engine.Engine{e}
	addr1 := util.CreateTestAddr("tcp")
	_, server1Addr, node1, stopper1 := createAndStartTestNode(addr1, engines1, addr1, t)
	defer stopper1.Stop()

	// Create a new node.
	engines2 := []engine.Engine{engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)}
	addr2 := util.CreateTestAddr("tcp")
	_, server2Addr, node2, stopper2 := createAndStartTestNode(addr2, engines2, server1Addr, t)
	defer stopper2.Stop()

	// Verify new node is able to bootstrap its store.
	if err := util.IsTrueWithin(func() bool { return node2.stores.GetStoreCount() == 1 }, testTimeout); err != nil {
		t.Fatal(err)
	}

	// Verify node1 sees node2 via gossip and vice versa.
	node1Key := gossip.MakeNodeIDKey(node1.Descriptor.NodeID)
	node2Key := gossip.MakeNodeIDKey(node2.Descriptor.NodeID)
	util.SucceedsWithin(t, 50*time.Millisecond, func() error {
		var nodeDesc1 roachpb.NodeDescriptor
		if err := node1.ctx.Gossip.GetInfoProto(node2Key, &nodeDesc1); err != nil {
			return err
		}
		if addr2Str, server2AddrStr := nodeDesc1.Address.String(), server2Addr.String(); addr2Str != server2AddrStr {
			return util.Errorf("addr2 gossip %s doesn't match addr2 address %s", addr2Str, server2AddrStr)
		}
		var nodeDesc2 roachpb.NodeDescriptor
		if err := node2.ctx.Gossip.GetInfoProto(node1Key, &nodeDesc2); err != nil {
			return err
		}
		if addr1Str, server1AddrStr := nodeDesc2.Address.String(), server1Addr.String(); addr1Str != server1AddrStr {
			return util.Errorf("addr1 gossip %s doesn't match addr1 address %s", addr1Str, server1AddrStr)
		}
		return nil
	})
}
Пример #29
0
// TestProgressWithDownNode verifies that a surviving quorum can make progress
// with a downed node.
func TestProgressWithDownNode(t *testing.T) {
	defer leaktest.AfterTest(t)
	mtc := startMultiTestContext(t, 3)
	defer mtc.Stop()

	rangeID := roachpb.RangeID(1)
	mtc.replicateRange(rangeID, 1, 2)

	incArgs := incrementArgs([]byte("a"), 5)
	if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &incArgs); err != nil {
		t.Fatal(err)
	}

	// Verify that the first increment propagates to all the engines.
	verify := func(expected []int64) {
		util.SucceedsWithin(t, time.Second, func() error {
			values := []int64{}
			for _, eng := range mtc.engines {
				val, _, err := engine.MVCCGet(eng, roachpb.Key("a"), mtc.clock.Now(), true, nil)
				if err != nil {
					return err
				}
				values = append(values, mustGetInt(val))
			}
			if !reflect.DeepEqual(expected, values) {
				return util.Errorf("expected %v, got %v", expected, values)
			}
			return nil
		})
	}
	verify([]int64{5, 5, 5})

	// Stop one of the replicas and issue a new increment.
	mtc.stopStore(1)
	incArgs = incrementArgs([]byte("a"), 11)
	if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &incArgs); err != nil {
		t.Fatal(err)
	}

	// The new increment can be seen on both live replicas.
	verify([]int64{16, 5, 16})

	// Once the downed node is restarted, it will catch up.
	mtc.restartStore(1)
	verify([]int64{16, 16, 16})
}
// TestReplicaGCQueueDropReplica verifies that a removed replica is
// immediately cleaned up.
func TestReplicaGCQueueDropReplica(t *testing.T) {
	defer leaktest.AfterTest(t)

	mtc := startMultiTestContext(t, 3)
	defer mtc.Stop()

	rangeID := roachpb.RangeID(1)
	mtc.replicateRange(rangeID, 0, 1, 2)
	mtc.unreplicateRange(rangeID, 0, 1)

	// Make sure the range is removed from the store.
	util.SucceedsWithin(t, time.Second, func() error {
		if _, err := mtc.stores[1].GetReplica(rangeID); !testutils.IsError(err, "range .* was not found") {
			return util.Errorf("expected range removal")
		}
		return nil
	})
}