Ejemplo n.º 1
0
// TestGossipCullNetwork verifies that a client will be culled from
// the network periodically (at cullInterval duration intervals).
func TestGossipCullNetwork(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t)
	local.SetCullInterval(5 * time.Millisecond)

	local.mu.Lock()
	for i := 0; i < minPeers; i++ {
		peer := startGossip(roachpb.NodeID(i+2), stopper, t)
		local.startClient(&peer.is.NodeAddr, stopper)
	}
	local.mu.Unlock()

	util.SucceedsSoon(t, func() error {
		if len(local.Outgoing()) == minPeers {
			return nil
		}
		return errors.New("some peers not yet connected")
	})

	local.manage()

	util.SucceedsSoon(t, func() error {
		// Verify that a client is closed within the cull interval.
		if len(local.Outgoing()) == minPeers-1 {
			return nil
		}
		return errors.New("no network culling occurred")
	})
}
Ejemplo n.º 2
0
// TestReplicateRange verifies basic replication functionality by creating two stores
// and a range, replicating the range to the second store, and reading its data there.
func TestReplicateRange(t *testing.T) {
	defer leaktest.AfterTest(t)()
	mtc := startMultiTestContext(t, 2)
	defer mtc.Stop()

	// Issue a command on the first node before replicating.
	incArgs := incrementArgs([]byte("a"), 5)
	if _, err := client.SendWrapped(rg1(mtc.stores[0]), nil, &incArgs); err != nil {
		t.Fatal(err)
	}

	rng, err := mtc.stores[0].GetReplica(1)
	if err != nil {
		t.Fatal(err)
	}

	if err := rng.ChangeReplicas(roachpb.ADD_REPLICA,
		roachpb.ReplicaDescriptor{
			NodeID:  mtc.stores[1].Ident.NodeID,
			StoreID: mtc.stores[1].Ident.StoreID,
		}, rng.Desc()); err != nil {
		t.Fatal(err)
	}
	// Verify no intent remains on range descriptor key.
	key := keys.RangeDescriptorKey(rng.Desc().StartKey)
	desc := roachpb.RangeDescriptor{}
	if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key, mtc.stores[0].Clock().Now(), true, nil, &desc); !ok || err != nil {
		t.Fatalf("fetching range descriptor yielded %t, %s", ok, err)
	}
	// Verify that in time, no intents remain on meta addressing
	// keys, and that range descriptor on the meta records is correct.
	util.SucceedsSoon(t, func() error {
		meta2 := keys.Addr(keys.RangeMetaKey(roachpb.RKeyMax))
		meta1 := keys.Addr(keys.RangeMetaKey(meta2))
		for _, key := range []roachpb.RKey{meta2, meta1} {
			metaDesc := roachpb.RangeDescriptor{}
			if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), key.AsRawKey(), mtc.stores[0].Clock().Now(), true, nil, &metaDesc); !ok || err != nil {
				return util.Errorf("failed to resolve %s", key.AsRawKey())
			}
			if !reflect.DeepEqual(metaDesc, desc) {
				return util.Errorf("descs not equal: %+v != %+v", metaDesc, desc)
			}
		}
		return nil
	})

	// Verify that the same data is available on the replica.
	util.SucceedsSoon(t, func() error {
		getArgs := getArgs([]byte("a"))
		if reply, err := client.SendWrappedWith(rg1(mtc.stores[1]), nil, roachpb.Header{
			ReadConsistency: roachpb.INCONSISTENT,
		}, &getArgs); err != nil {
			return util.Errorf("failed to read data: %s", err)
		} else if e, v := int64(5), mustGetInt(reply.(*roachpb.GetResponse).Value); v != e {
			return util.Errorf("failed to read correct data: expected %d, got %d", e, v)
		}
		return nil
	})
}
Ejemplo n.º 3
0
func TestGossipOrphanedStallDetection(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t, metric.NewRegistry())
	local.SetStallInterval(5 * time.Millisecond)

	// Make sure we have the sentinel to ensure that its absence is not the
	// cause of stall detection.
	if err := local.AddInfo(KeySentinel, nil, time.Hour); err != nil {
		t.Fatal(err)
	}

	peerStopper := stop.NewStopper()
	peer := startGossip(2, peerStopper, t, metric.NewRegistry())

	peerNodeID := peer.GetNodeID()
	peerAddr := peer.GetNodeAddr()

	local.startClient(peerAddr, peerNodeID)

	util.SucceedsSoon(t, func() error {
		for _, peerID := range local.Outgoing() {
			if peerID == peerNodeID {
				return nil
			}
		}
		return errors.Errorf("%d not yet connected", peerNodeID)
	})

	local.bootstrap()
	local.manage()

	peerStopper.Stop()

	util.SucceedsSoon(t, func() error {
		for _, peerID := range local.Outgoing() {
			if peerID == peerNodeID {
				return errors.Errorf("%d still connected", peerNodeID)
			}
		}
		return nil
	})

	peerStopper = stop.NewStopper()
	defer peerStopper.Stop()
	peer = startGossipAtAddr(peerNodeID, peerAddr, peerStopper, t, metric.NewRegistry())

	util.SucceedsSoon(t, func() error {
		for _, peerID := range local.Outgoing() {
			if peerID == peerNodeID {
				return nil
			}
		}
		return errors.Errorf("%d not yet connected", peerNodeID)
	})
}
Ejemplo n.º 4
0
func TestOffsetMeasurement(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()

	serverTime := time.Unix(0, 20)
	serverClock := hlc.NewClock(serverTime.UnixNano)
	serverCtx := newNodeTestContext(serverClock, stopper)
	s, ln := newTestServer(t, serverCtx, true)
	remoteAddr := ln.Addr().String()

	RegisterHeartbeatServer(s, &HeartbeatService{
		clock:              serverClock,
		remoteClockMonitor: serverCtx.RemoteClocks,
	})

	// Create a client clock that is behind the server clock.
	clientAdvancing := AdvancingClock{time: time.Unix(0, 10)}
	clientClock := hlc.NewClock(clientAdvancing.UnixNano)
	clientClock.SetMaxOffset(time.Millisecond)
	clientCtx := newNodeTestContext(clientClock, stopper)
	clientCtx.RemoteClocks.offsetTTL = 5 * clientAdvancing.advancementInterval
	if _, err := clientCtx.GRPCDial(remoteAddr); err != nil {
		t.Fatal(err)
	}

	expectedOffset := RemoteOffset{Offset: 10, Uncertainty: 0, MeasuredAt: 10}
	util.SucceedsSoon(t, func() error {
		clientCtx.RemoteClocks.mu.Lock()
		defer clientCtx.RemoteClocks.mu.Unlock()

		if o, ok := clientCtx.RemoteClocks.mu.offsets[remoteAddr]; !ok {
			return util.Errorf("expected offset of %s to be initialized, but it was not", remoteAddr)
		} else if o != expectedOffset {
			return util.Errorf("expected:\n%v\nactual:\n%v", expectedOffset, o)
		}
		return nil
	})

	// Change the client such that it receives a heartbeat right after the
	// maximum clock reading delay.
	clientAdvancing.Lock()
	clientAdvancing.advancementInterval = maximumPingDurationMult*clientClock.MaxOffset() + 1*time.Nanosecond
	clientAdvancing.Unlock()

	util.SucceedsSoon(t, func() error {
		clientCtx.RemoteClocks.mu.Lock()
		defer clientCtx.RemoteClocks.mu.Unlock()

		if o, ok := clientCtx.RemoteClocks.mu.offsets[remoteAddr]; ok {
			return util.Errorf("expected offset to have been cleared, but found %s", o)
		}
		return nil
	})
}
Ejemplo n.º 5
0
// TestRaftTransportCircuitBreaker verifies that messages will be
// dropped waiting for raft node connection to be established.
func TestRaftTransportCircuitBreaker(t *testing.T) {
	defer leaktest.AfterTest(t)()
	rttc := newRaftTransportTestContext(t)
	defer rttc.Stop()

	serverReplica := roachpb.ReplicaDescriptor{
		NodeID:    2,
		StoreID:   2,
		ReplicaID: 2,
	}
	_, serverAddr := rttc.AddNodeWithoutGossip(serverReplica.NodeID)
	serverChannel := rttc.ListenStore(serverReplica.NodeID, serverReplica.StoreID)

	clientReplica := roachpb.ReplicaDescriptor{
		NodeID:    1,
		StoreID:   1,
		ReplicaID: 1,
	}
	clientTransport := rttc.AddNode(clientReplica.NodeID)

	// The transport is set up asynchronously, so we expect the first
	// Send to return true here.
	if !rttc.Send(clientReplica, serverReplica, 1, raftpb.Message{Commit: 1}) {
		t.Errorf("unexpectedly failed sending while connection is being asynchronously established")
	}

	// However, sending repeated messages should begin dropping once
	// the circuit breaker does trip.
	util.SucceedsSoon(t, func() error {
		if rttc.Send(clientReplica, serverReplica, 1, raftpb.Message{Commit: 1}) {
			return errors.Errorf("expected circuit breaker to trip")
		}
		return nil
	})

	// Now, gossip address of server.
	rttc.GossipNode(serverReplica.NodeID, serverAddr)

	// Keep sending commit=2 until breaker resets and we receive the
	// first instance. It's possible an earlier message for commit=1
	// snuck in.
	util.SucceedsSoon(t, func() error {
		if !rttc.Send(clientReplica, serverReplica, 1, raftpb.Message{Commit: 2}) {
			clientTransport.GetCircuitBreaker(serverReplica.NodeID).Reset()
		}
		select {
		case req := <-serverChannel.ch:
			if req.Message.Commit == 2 {
				return nil
			}
		default:
		}
		return errors.Errorf("expected message commit=2")
	})
}
Ejemplo n.º 6
0
// TestRangeSplitsWithWritePressure sets the zone config max bytes for
// a range to 256K and writes data until there are five ranges.
func TestRangeSplitsWithWritePressure(t *testing.T) {
	defer leaktest.AfterTest(t)()
	// Override default zone config.
	cfg := config.DefaultZoneConfig()
	cfg.RangeMaxBytes = 1 << 18
	defer config.TestingSetDefaultZoneConfig(cfg)()

	dbCtx := client.DefaultDBContext()
	dbCtx.TxnRetryOptions = retry.Options{
		InitialBackoff: 1 * time.Millisecond,
		MaxBackoff:     10 * time.Millisecond,
		Multiplier:     2,
	}
	s, _ := createTestDBWithContext(t, dbCtx)
	// This is purely to silence log spam.
	config.TestingSetupZoneConfigHook(s.Stopper)
	defer s.Stop()

	// Start test writer write about a 32K/key so there aren't too many writes necessary to split 64K range.
	done := make(chan struct{})
	var wg sync.WaitGroup
	wg.Add(1)
	go startTestWriter(s.DB, int64(0), 1<<15, &wg, nil, nil, done, t)

	// Check that we split 5 times in allotted time.
	util.SucceedsSoon(t, func() error {
		// Scan the txn records.
		rows, err := s.DB.Scan(keys.Meta2Prefix, keys.MetaMax, 0)
		if err != nil {
			return util.Errorf("failed to scan meta2 keys: %s", err)
		}
		if lr := len(rows); lr < 5 {
			return util.Errorf("expected >= 5 scans; got %d", lr)
		}
		return nil
	})
	close(done)
	wg.Wait()

	// This write pressure test often causes splits while resolve
	// intents are in flight, causing them to fail with range key
	// mismatch errors. However, LocalSender should retry in these
	// cases. Check here via MVCC scan that there are no dangling write
	// intents. We do this using a SucceedsSoon construct to account
	// for timing of finishing the test writer and a possibly-ongoing
	// asynchronous split.
	util.SucceedsSoon(t, func() error {
		if _, _, err := engine.MVCCScan(context.Background(), s.Eng, keys.LocalMax, roachpb.KeyMax, 0, hlc.MaxTimestamp, true, nil); err != nil {
			return util.Errorf("failed to verify no dangling intents: %s", err)
		}
		return nil
	})
}
Ejemplo n.º 7
0
// TestScannerDisabled verifies that disabling a scanner prevents
// replicas from being added to queues.
func TestScannerDisabled(t *testing.T) {
	defer leaktest.AfterTest(t)()
	const count = 3
	ranges := newTestRangeSet(count, t)
	q := &testQueue{}
	s := newReplicaScanner(1*time.Millisecond, 0, ranges)
	s.AddQueues(q)
	mc := hlc.NewManualClock(0)
	clock := hlc.NewClock(mc.UnixNano)
	stopper := stop.NewStopper()
	s.Start(clock, stopper)
	defer stopper.Stop()

	// Verify queue gets all ranges.
	util.SucceedsSoon(t, func() error {
		if q.count() != count {
			return errors.Errorf("expected %d replicas; have %d", count, q.count())
		}
		if s.scanCount() == 0 {
			return errors.Errorf("expected scanner count to increment")
		}
		return nil
	})

	lastWaitEnabledCount := s.waitEnabledCount()

	// Now, disable the scanner.
	s.SetDisabled(true)
	util.SucceedsSoon(t, func() error {
		if s.waitEnabledCount() == lastWaitEnabledCount {
			return errors.Errorf("expected scanner to stop when disabled")
		}
		return nil
	})

	lastScannerCount := s.scanCount()

	// Remove the replicas and verify the scanner still removes them while disabled.
	ranges.Visit(func(repl *Replica) bool {
		s.RemoveReplica(repl)
		return true
	})

	util.SucceedsSoon(t, func() error {
		if qc := q.count(); qc != 0 {
			return errors.Errorf("expected queue to be empty after replicas removed from scanner; got %d", qc)
		}
		return nil
	})
	if sc := s.scanCount(); sc != lastScannerCount {
		t.Errorf("expected scanner count to not increment: %d != %d", sc, lastScannerCount)
	}
}
Ejemplo n.º 8
0
func TestFailedOffsetMeasurement(t *testing.T) {
	defer leaktest.AfterTest(t)()

	stopper := stop.NewStopper()
	defer stopper.Stop()

	// Can't be zero because that'd be an empty offset.
	clock := hlc.NewClock(hlc.NewManualClock(1).UnixNano)

	serverCtx := newNodeTestContext(clock, stopper)
	serverCtx.RemoteClocks.monitorInterval = 100 * time.Millisecond
	s, ln := newTestServer(t, serverCtx, true)
	remoteAddr := ln.Addr().String()

	heartbeat := &ManualHeartbeatService{
		clock:              clock,
		remoteClockMonitor: serverCtx.RemoteClocks,
		ready:              make(chan struct{}),
		stopper:            stopper,
	}
	RegisterHeartbeatServer(s, heartbeat)

	// Create a client that never receives a heartbeat after the first.
	clientCtx := newNodeTestContext(clock, stopper)
	// Increase the timeout so that failure arises from exceeding the maximum
	// clock reading delay, not the timeout.
	clientCtx.HeartbeatTimeout = 20 * clientCtx.HeartbeatInterval
	_, err := clientCtx.GRPCDial(remoteAddr)
	if err != nil {
		t.Fatal(err)
	}
	heartbeat.ready <- struct{}{} // Allow one heartbeat for initialization.

	util.SucceedsSoon(t, func() error {
		clientCtx.RemoteClocks.mu.Lock()
		defer clientCtx.RemoteClocks.mu.Unlock()

		if _, ok := clientCtx.RemoteClocks.mu.offsets[remoteAddr]; !ok {
			return util.Errorf("expected offset of %s to be initialized, but it was not", remoteAddr)
		}
		return nil
	})

	util.SucceedsSoon(t, func() error {
		serverCtx.RemoteClocks.mu.Lock()
		defer serverCtx.RemoteClocks.mu.Unlock()

		if o, ok := serverCtx.RemoteClocks.mu.offsets[remoteAddr]; ok {
			return util.Errorf("expected offset of %s to not be initialized, but it was: %v", remoteAddr, o)
		}
		return nil
	})
}
// TestReplicaGCQueueDropReplica verifies that a removed replica is
// immediately cleaned up.
func TestReplicaGCQueueDropReplicaDirect(t *testing.T) {
	defer leaktest.AfterTest(t)()
	mtc := &multiTestContext{}
	const numStores = 3
	rangeID := roachpb.RangeID(1)

	// In this test, the Replica on the second Node is removed, and the test
	// verifies that that Node adds this Replica to its RangeGCQueue. However,
	// the queue does a consistent lookup which will usually be read from
	// Node 1. Hence, if Node 1 hasn't processed the removal when Node 2 has,
	// no GC will take place since the consistent RangeLookup hits the first
	// Node. We use the TestingCommandFilter to make sure that the second Node
	// waits for the first.
	ctx := storage.TestStoreContext()
	mtc.storeContext = &ctx
	mtc.storeContext.TestingKnobs.TestingCommandFilter =
		func(filterArgs storageutils.FilterArgs) error {
			et, ok := filterArgs.Req.(*roachpb.EndTransactionRequest)
			if !ok || filterArgs.Sid != 2 {
				return nil
			}
			rct := et.InternalCommitTrigger.GetChangeReplicasTrigger()
			if rct == nil || rct.ChangeType != roachpb.REMOVE_REPLICA {
				return nil
			}
			util.SucceedsSoon(t, func() error {
				r, err := mtc.stores[0].GetReplica(rangeID)
				if err != nil {
					return err
				}
				if i, _ := r.Desc().FindReplica(2); i >= 0 {
					return errors.New("expected second node gone from first node's known replicas")
				}
				return nil
			})
			return nil
		}

	mtc.Start(t, numStores)
	defer mtc.Stop()

	mtc.replicateRange(rangeID, 1, 2)
	mtc.unreplicateRange(rangeID, 1)

	// Make sure the range is removed from the store.
	util.SucceedsSoon(t, func() error {
		if _, err := mtc.stores[1].GetReplica(rangeID); !testutils.IsError(err, "range .* was not found") {
			return util.Errorf("expected range removal")
		}
		return nil
	})
}
Ejemplo n.º 10
0
// TestBaseQueueProcess verifies that items from the queue are
// processed according to the timer function.
func TestBaseQueueProcess(t *testing.T) {
	defer leaktest.AfterTest(t)()
	g, stopper := gossipForTest(t)
	defer stopper.Stop()

	r1 := &Replica{RangeID: 1}
	if err := r1.setDesc(&roachpb.RangeDescriptor{RangeID: 1}); err != nil {
		t.Fatal(err)
	}
	r2 := &Replica{RangeID: 2}
	if err := r2.setDesc(&roachpb.RangeDescriptor{RangeID: 2}); err != nil {
		t.Fatal(err)
	}
	testQueue := &testQueueImpl{
		blocker: make(chan struct{}, 1),
		shouldQueueFn: func(now roachpb.Timestamp, r *Replica) (shouldQueue bool, priority float64) {
			shouldQueue = true
			priority = float64(r.RangeID)
			return
		},
	}
	bq := makeBaseQueue("test", testQueue, g, 2)
	mc := hlc.NewManualClock(0)
	clock := hlc.NewClock(mc.UnixNano)
	bq.Start(clock, stopper)

	bq.MaybeAdd(r1, roachpb.ZeroTimestamp)
	bq.MaybeAdd(r2, roachpb.ZeroTimestamp)
	if pc := atomic.LoadInt32(&testQueue.processed); pc != 0 {
		t.Errorf("expected no processed ranges; got %d", pc)
	}

	testQueue.blocker <- struct{}{}
	util.SucceedsSoon(t, func() error {
		if pc := atomic.LoadInt32(&testQueue.processed); pc != int32(1) {
			return util.Errorf("expected %d processed replicas; got %d", 1, pc)
		}
		return nil
	})

	testQueue.blocker <- struct{}{}
	util.SucceedsSoon(t, func() error {
		if pc := atomic.LoadInt32(&testQueue.processed); pc < int32(2) {
			return util.Errorf("expected >= %d processed replicas; got %d", 2, pc)
		}
		return nil
	})

	// Ensure the test queue is not blocked on a stray call to
	// testQueueImpl.timer().
	close(testQueue.blocker)
}
Ejemplo n.º 11
0
// TestStoreZoneUpdateAndRangeSplit verifies that modifying the zone
// configuration changes range max bytes and Range.maybeSplit() takes
// max bytes into account when deciding whether to enqueue a range for
// splitting. It further verifies that the range is in fact split on
// exceeding zone's RangeMaxBytes.
func TestStoreZoneUpdateAndRangeSplit(t *testing.T) {
	defer leaktest.AfterTest(t)()
	store, stopper, _ := createTestStore(t)
	config.TestingSetupZoneConfigHook(stopper)
	defer stopper.Stop()

	maxBytes := int64(1 << 16)
	// Set max bytes.
	descID := uint32(keys.MaxReservedDescID + 1)
	config.TestingSetZoneConfig(descID, &config.ZoneConfig{RangeMaxBytes: maxBytes})

	// Trigger gossip callback.
	if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfig{}, 0); err != nil {
		t.Fatal(err)
	}

	tableBoundary := keys.MakeTablePrefix(descID)

	{
		var rng *storage.Replica

		// Wait for the range to be split along table boundaries.
		expectedRSpan := roachpb.RSpan{Key: roachpb.RKey(tableBoundary), EndKey: roachpb.RKeyMax}
		util.SucceedsSoon(t, func() error {
			rng = store.LookupReplica(tableBoundary, nil)
			if actualRSpan := rng.Desc().RSpan(); !actualRSpan.Equal(expectedRSpan) {
				return util.Errorf("expected range %s to span %s", rng, expectedRSpan)
			}
			return nil
		})

		// Check range's max bytes settings.
		if actualMaxBytes := rng.GetMaxBytes(); actualMaxBytes != maxBytes {
			t.Fatalf("range %s max bytes mismatch, got: %d, expected: %d", rng, actualMaxBytes, maxBytes)
		}

		// Look in the range after prefix we're writing to.
		fillRange(store, rng.RangeID, tableBoundary, maxBytes, t)
	}

	// Verify that the range is in fact split.
	util.SucceedsSoon(t, func() error {
		rng := store.LookupReplica(keys.MakeTablePrefix(descID+1), nil)
		rngDesc := rng.Desc()
		rngStart, rngEnd := rngDesc.StartKey, rngDesc.EndKey
		if rngStart.Equal(tableBoundary) || !rngEnd.Equal(roachpb.RKeyMax) {
			return util.Errorf("range %s has not yet split", rng)
		}
		return nil
	})
}
Ejemplo n.º 12
0
// TestNodeJoin verifies a new node is able to join a bootstrapped
// cluster consisting of one node.
func TestNodeJoin(t *testing.T) {
	defer leaktest.AfterTest(t)()
	engineStopper := stop.NewStopper()
	defer engineStopper.Stop()
	e := engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)
	if _, err := bootstrapCluster([]engine.Engine{e}, kv.NewTxnMetrics(metric.NewRegistry())); err != nil {
		t.Fatal(err)
	}

	// Start the bootstrap node.
	engines1 := []engine.Engine{e}
	addr1 := util.CreateTestAddr("tcp")
	_, server1Addr, node1, stopper1 := createAndStartTestNode(addr1, engines1, addr1, t)
	defer stopper1.Stop()

	// Create a new node.
	engines2 := []engine.Engine{engine.NewInMem(roachpb.Attributes{}, 1<<20, engineStopper)}
	addr2 := util.CreateTestAddr("tcp")
	_, server2Addr, node2, stopper2 := createAndStartTestNode(addr2, engines2, server1Addr, t)
	defer stopper2.Stop()

	// Verify new node is able to bootstrap its store.
	util.SucceedsSoon(t, func() error {
		if sc := node2.stores.GetStoreCount(); sc != 1 {
			return util.Errorf("GetStoreCount() expected 1; got %d", sc)
		}
		return nil
	})

	// Verify node1 sees node2 via gossip and vice versa.
	node1Key := gossip.MakeNodeIDKey(node1.Descriptor.NodeID)
	node2Key := gossip.MakeNodeIDKey(node2.Descriptor.NodeID)
	util.SucceedsSoon(t, func() error {
		var nodeDesc1 roachpb.NodeDescriptor
		if err := node1.ctx.Gossip.GetInfoProto(node2Key, &nodeDesc1); err != nil {
			return err
		}
		if addr2Str, server2AddrStr := nodeDesc1.Address.String(), server2Addr.String(); addr2Str != server2AddrStr {
			return util.Errorf("addr2 gossip %s doesn't match addr2 address %s", addr2Str, server2AddrStr)
		}
		var nodeDesc2 roachpb.NodeDescriptor
		if err := node2.ctx.Gossip.GetInfoProto(node1Key, &nodeDesc2); err != nil {
			return err
		}
		if addr1Str, server1AddrStr := nodeDesc2.Address.String(), server1Addr.String(); addr1Str != server1AddrStr {
			return util.Errorf("addr1 gossip %s doesn't match addr1 address %s", addr1Str, server1AddrStr)
		}
		return nil
	})
}
Ejemplo n.º 13
0
// TestScannerTiming verifies that ranges are scanned, regardless
// of how many, to match scanInterval.
func TestScannerTiming(t *testing.T) {
	defer leaktest.AfterTest(t)()
	const count = 3
	const runTime = 100 * time.Millisecond
	const maxError = 7500 * time.Microsecond
	durations := []time.Duration{
		15 * time.Millisecond,
		25 * time.Millisecond,
	}
	for i, duration := range durations {
		util.SucceedsSoon(t, func() error {
			ranges := newTestRangeSet(count, t)
			q := &testQueue{}
			s := newReplicaScanner(duration, 0, ranges)
			s.AddQueues(q)
			mc := hlc.NewManualClock(0)
			clock := hlc.NewClock(mc.UnixNano)
			stopper := stop.NewStopper()
			s.Start(clock, stopper)
			time.Sleep(runTime)
			stopper.Stop()

			avg := s.avgScan()
			log.Infof("%d: average scan: %s", i, avg)
			if avg.Nanoseconds()-duration.Nanoseconds() > maxError.Nanoseconds() ||
				duration.Nanoseconds()-avg.Nanoseconds() > maxError.Nanoseconds() {
				return errors.Errorf("expected %s, got %s: exceeds max error of %s", duration, avg, maxError)
			}
			return nil
		})
	}
}
Ejemplo n.º 14
0
// TestMetricsRecording verifies that Node statistics are periodically recorded
// as time series data.
func TestMetricsRecording(t *testing.T) {
	defer leaktest.AfterTest(t)()
	tsrv := TestServer{}
	tsrv.Ctx = NewTestContext()
	tsrv.Ctx.MetricsFrequency = 5 * time.Millisecond
	if err := tsrv.Start(); err != nil {
		t.Fatal(err)
	}
	defer tsrv.Stop()

	checkTimeSeriesKey := func(now int64, keyName string) error {
		key := ts.MakeDataKey(keyName, "", ts.Resolution10s, now)
		data := roachpb.InternalTimeSeriesData{}
		return tsrv.db.GetProto(key, &data).GoError()
	}

	// Verify that metrics for the current timestamp are recorded. This should
	// be true very quickly.
	util.SucceedsSoon(t, func() error {
		now := tsrv.Clock().PhysicalNow()
		if err := checkTimeSeriesKey(now, "cr.store.livebytes.1"); err != nil {
			return err
		}
		if err := checkTimeSeriesKey(now, "cr.node.sys.allocbytes.1"); err != nil {
			return err
		}
		return nil
	})
}
Ejemplo n.º 15
0
func gossipForTest(t *testing.T) (*gossip.Gossip, *stop.Stopper) {
	stopper := stop.NewStopper()

	// Setup fake zone config handler.
	config.TestingSetupZoneConfigHook(stopper)

	rpcContext := rpc.NewContext(&base.Context{}, hlc.NewClock(hlc.UnixNano), stopper)
	g := gossip.New(rpcContext, gossip.TestBootstrap, stopper)
	// Have to call g.SetNodeID before call g.AddInfo
	g.SetNodeID(roachpb.NodeID(1))
	// Put an empty system config into gossip.
	if err := g.AddInfoProto(gossip.KeySystemConfig,
		&config.SystemConfig{}, 0); err != nil {
		t.Fatal(err)
	}

	// Wait for SystemConfig.
	util.SucceedsSoon(t, func() error {
		if g.GetSystemConfig() == nil {
			return util.Errorf("expected non-nil system config")
		}
		return nil
	})

	return g, stopper
}
Ejemplo n.º 16
0
func checkRangeReplication(t *testing.T, c cluster.Cluster, d time.Duration) {
	// Always talk to node 0.
	client, dbStopper := makeClient(t, c.ConnString(0))
	defer dbStopper.Stop()

	wantedReplicas := 3
	if c.NumNodes() < 3 {
		wantedReplicas = c.NumNodes()
	}

	log.Infof("waiting for first range to have %d replicas", wantedReplicas)

	util.SucceedsSoon(t, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case <-time.After(1 * time.Second):
		}

		foundReplicas, err := countRangeReplicas(client)
		if err != nil {
			return err
		}

		if log.V(1) {
			log.Infof("found %d replicas", foundReplicas)
		}
		if foundReplicas >= wantedReplicas {
			return nil
		}
		return fmt.Errorf("expected %d replicas, only found %d", wantedReplicas, foundReplicas)
	})
}
Ejemplo n.º 17
0
// TestGossipCullNetwork verifies that a client will be culled from
// the network periodically (at cullInterval duration intervals).
func TestGossipCullNetwork(t *testing.T) {
	defer leaktest.AfterTest(t)()

	// Create the local gossip and minPeers peers.
	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t)
	local.SetCullInterval(5 * time.Millisecond)
	peers := []*Gossip{}
	for i := 0; i < minPeers; i++ {
		peers = append(peers, startGossip(roachpb.NodeID(i+2), stopper, t))
	}

	// Start clients to all peers and start the local gossip's manage routine.
	local.mu.Lock()
	for _, p := range peers {
		pAddr := p.is.NodeAddr
		local.startClient(&pAddr, stopper)
	}
	local.mu.Unlock()
	local.manage()

	util.SucceedsSoon(t, func() error {
		// Verify that a client is closed within the cull interval.
		if len(local.Outgoing()) == minPeers-1 {
			return nil
		}
		return errors.New("no network culling occurred")
	})
}
Ejemplo n.º 18
0
func TestEagerReplication(t *testing.T) {
	defer leaktest.AfterTest(t)()
	store, stopper, _ := createTestStore(t)
	defer stopper.Stop()

	// Disable the replica scanner so that we rely on the eager replication code
	// path that occurs after splits.
	store.SetReplicaScannerDisabled(true)

	if err := server.WaitForInitialSplits(store.DB()); err != nil {
		t.Fatal(err)
	}

	// WaitForInitialSplits will return as soon as the meta2 span contains the
	// expected number of descriptors. But the addition of replicas to the
	// replicateQueue after a split occurs happens after the update of the
	// descriptors in meta2 leaving a tiny window of time in which the newly
	// split replica will not have been added to purgatory. Thus we loop.
	util.SucceedsSoon(t, func() error {
		// After the initial splits have been performed, all of the resulting ranges
		// should be present in replicate queue purgatory (because we only have a
		// single store in the test and thus replication cannot succeed).
		expected := server.ExpectedInitialRangeCount()
		if n := store.ReplicateQueuePurgatoryLength(); expected != n {
			return errors.Errorf("expected %d replicas in purgatory, but found %d", expected, n)
		}
		return nil
	})
}
Ejemplo n.º 19
0
// TestTxnCoordSenderHeartbeat verifies periodic heartbeat of the
// transaction record.
func TestTxnCoordSenderHeartbeat(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s := createTestDB(t)
	defer s.Stop()
	defer teardownHeartbeats(s.Sender)

	// Set heartbeat interval to 1ms for testing.
	s.Sender.heartbeatInterval = 1 * time.Millisecond

	initialTxn := client.NewTxn(*s.DB)
	if err := initialTxn.Put(roachpb.Key("a"), []byte("value")); err != nil {
		t.Fatal(err)
	}

	// Verify 3 heartbeats.
	var heartbeatTS roachpb.Timestamp
	for i := 0; i < 3; i++ {
		util.SucceedsSoon(t, func() error {
			ok, txn, pErr := getTxn(s.Sender, &initialTxn.Proto)
			if !ok || pErr != nil {
				t.Fatalf("got txn: %t: %s", ok, pErr)
			}
			// Advance clock by 1ns.
			// Locking the TxnCoordSender to prevent a data race.
			s.Sender.Lock()
			s.Manual.Increment(1)
			s.Sender.Unlock()
			if heartbeatTS.Less(*txn.LastHeartbeat) {
				heartbeatTS = *txn.LastHeartbeat
				return nil
			}
			return util.Errorf("expected heartbeat")
		})
	}
}
Ejemplo n.º 20
0
// TestStoreRangeUpReplicate verifies that the replication queue will notice
// under-replicated ranges and replicate them.
func TestStoreRangeUpReplicate(t *testing.T) {
	defer leaktest.AfterTest(t)()
	mtc := startMultiTestContext(t, 3)
	defer mtc.Stop()

	// Initialize the gossip network.
	var wg sync.WaitGroup
	wg.Add(len(mtc.stores))
	key := gossip.MakePrefixPattern(gossip.KeyStorePrefix)
	mtc.stores[0].Gossip().RegisterCallback(key, func(_ string, _ roachpb.Value) { wg.Done() })
	for _, s := range mtc.stores {
		s.GossipStore()
	}
	wg.Wait()

	// Once we know our peers, trigger a scan.
	mtc.stores[0].ForceReplicationScanAndProcess()

	// The range should become available on every node.
	util.SucceedsSoon(t, func() error {
		for _, s := range mtc.stores {
			r := s.LookupReplica(roachpb.RKey("a"), roachpb.RKey("b"))
			if r == nil {
				return util.Errorf("expected replica for keys \"a\" - \"b\"")
			}
		}
		return nil
	})
}
Ejemplo n.º 21
0
func testBuildInfoInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	checkGossip(t, c, 20*time.Second, hasPeers(c.NumNodes()))

	util.SucceedsSoon(t, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		default:
		}
		var r struct {
			BuildInfo map[string]string
		}
		if err := getJSON(c.URL(0), "/_status/details/local", &r); err != nil {
			return err
		}
		for _, key := range []string{"goVersion", "tag", "time", "dependencies"} {
			if val, ok := r.BuildInfo[key]; !ok {
				t.Errorf("build info missing for \"%s\"", key)
			} else if val == "" {
				t.Errorf("build info not set for \"%s\"", key)
			}
		}
		return nil
	})
}
Ejemplo n.º 22
0
// TestStoreRangeSplitWithMaxBytesUpdate tests a scenario where a new
// zone config that updates the max bytes is set and triggers a range
// split.
func TestStoreRangeSplitWithMaxBytesUpdate(t *testing.T) {
	defer leaktest.AfterTest(t)()
	store, stopper, _ := createTestStore(t)
	config.TestingSetupZoneConfigHook(stopper)
	defer stopper.Stop()

	origRng := store.LookupReplica(roachpb.RKeyMin, nil)

	// Set max bytes.
	maxBytes := int64(1 << 16)
	descID := uint32(keys.MaxReservedDescID + 1)
	config.TestingSetZoneConfig(descID, &config.ZoneConfig{RangeMaxBytes: maxBytes})

	// Trigger gossip callback.
	if err := store.Gossip().AddInfoProto(gossip.KeySystemConfig, &config.SystemConfig{}, 0); err != nil {
		t.Fatal(err)
	}

	// Verify that the range is split and the new range has the correct max bytes.
	util.SucceedsSoon(t, func() error {
		newRng := store.LookupReplica(keys.MakeTablePrefix(descID), nil)
		if newRng.RangeID == origRng.RangeID {
			return util.Errorf("expected new range created by split")
		}
		if newRng.GetMaxBytes() != maxBytes {
			return util.Errorf("expected %d max bytes for the new range, but got %d",
				maxBytes, newRng.GetMaxBytes())
		}
		return nil
	})
}
Ejemplo n.º 23
0
func testBuildInfoInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	checkGossip(t, c, 20*time.Second, hasPeers(c.NumNodes()))

	var details server.DetailsResponse
	util.SucceedsSoon(t, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
		default:
		}
		return util.GetJSON(cluster.HTTPClient, c.URL(0)+"/_status/details/local", &details)
	})

	bi := details.BuildInfo
	testData := map[string]string{
		"go_version":   bi.GoVersion,
		"tag":          bi.Tag,
		"time":         bi.Time,
		"dependencies": bi.Dependencies,
	}
	for key, val := range testData {
		if val == "" {
			t.Errorf("build info not set for \"%s\"", key)
		}
	}
}
Ejemplo n.º 24
0
// TestClientDisconnectRedundant verifies that the gossip server
// will drop an outgoing client connection that is already an
// inbound client connection of another node.
func TestClientDisconnectRedundant(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t, metric.NewRegistry())
	remote := startGossip(2, stopper, t, metric.NewRegistry())
	// startClient requires locks are held, so acquire here.
	local.mu.Lock()
	remote.mu.Lock()
	rAddr := remote.mu.is.NodeAddr
	lAddr := local.mu.is.NodeAddr
	local.startClient(&rAddr, remote.mu.is.NodeID)
	remote.startClient(&lAddr, local.mu.is.NodeID)
	local.mu.Unlock()
	remote.mu.Unlock()
	local.manage()
	remote.manage()
	util.SucceedsSoon(t, func() error {
		// Check which of the clients is connected to the other.
		ok1 := local.findClient(func(c *client) bool { return c.addr.String() == rAddr.String() }) != nil
		ok2 := remote.findClient(func(c *client) bool { return c.addr.String() == lAddr.String() }) != nil
		// We expect node 2 to disconnect; if both are still connected,
		// it's possible that node 1 gossiped before node 2 connected, in
		// which case we have to gossip from node 1 to trigger the
		// disconnect redundant client code.
		if ok1 && ok2 {
			if err := local.AddInfo("local-key", nil, time.Second); err != nil {
				t.Fatal(err)
			}
		} else if ok1 && !ok2 && verifyServerMaps(local, 0) && verifyServerMaps(remote, 1) {
			return nil
		}
		return errors.New("local client to remote not yet closed as redundant")
	})
}
Ejemplo n.º 25
0
// TestTxnCoordSenderGCTimeout verifies that the coordinator cleans up extant
// transactions and intents after the lastUpdateNanos exceeds the timeout.
func TestTxnCoordSenderGCTimeout(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s, sender := createTestDB(t)
	defer s.Stop()

	// Set heartbeat interval to 1ms for testing.
	sender.heartbeatInterval = 1 * time.Millisecond

	txn := client.NewTxn(context.Background(), *s.DB)
	key := roachpb.Key("a")
	if err := txn.Put(key, []byte("value")); err != nil {
		t.Fatal(err)
	}

	// Now, advance clock past the default client timeout.
	// Locking the TxnCoordSender to prevent a data race.
	sender.Lock()
	s.Manual.Set(defaultClientTimeout.Nanoseconds() + 1)
	sender.Unlock()

	txnID := *txn.Proto.ID

	util.SucceedsSoon(t, func() error {
		// Locking the TxnCoordSender to prevent a data race.
		sender.Lock()
		_, ok := sender.txns[txnID]
		sender.Unlock()
		if ok {
			return util.Errorf("expected garbage collection")
		}
		return nil
	})

	verifyCleanup(key, sender, s.Eng, t)
}
Ejemplo n.º 26
0
// TestClientDisallowMultipleConns verifies that the server disallows
// multiple connections from the same client node ID.
func TestClientDisallowMultipleConns(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t)
	remote := startGossip(2, stopper, t)
	local.mu.Lock()
	remote.mu.Lock()
	rAddr := remote.is.NodeAddr
	// Start two clients from local to remote. RPC client cache is
	// disabled via the context, so we'll start two different outgoing
	// connections.
	local.startClient(&rAddr, stopper)
	local.startClient(&rAddr, stopper)
	local.mu.Unlock()
	remote.mu.Unlock()
	local.manage()
	remote.manage()
	util.SucceedsSoon(t, func() error {
		// Verify that the remote server has only a single incoming
		// connection and the local server has only a single outgoing
		// connection.
		local.mu.Lock()
		remote.mu.Lock()
		outgoing := local.outgoing.len()
		incoming := remote.incoming.len()
		local.mu.Unlock()
		remote.mu.Unlock()
		if outgoing == 1 && incoming == 1 && verifyServerMaps(local, 0) && verifyServerMaps(remote, 1) {
			return nil
		}
		return util.Errorf("incorrect number of incoming (%d) or outgoing (%d) connections", incoming, outgoing)
	})
}
Ejemplo n.º 27
0
func gossipForTest(t *testing.T) (*gossip.Gossip, *stop.Stopper) {
	stopper := stop.NewStopper()

	// Setup fake zone config handler.
	config.TestingSetupZoneConfigHook(stopper)

	rpcContext := rpc.NewContext(nil, nil, stopper)
	g := gossip.New(rpcContext, nil, stopper)
	// Have to call g.SetNodeID before call g.AddInfo
	g.SetNodeID(roachpb.NodeID(1))
	// Put an empty system config into gossip.
	if err := g.AddInfoProto(gossip.KeySystemConfig,
		&config.SystemConfig{}, 0); err != nil {
		t.Fatal(err)
	}

	// Wait for SystemConfig.
	util.SucceedsSoon(t, func() error {
		if _, ok := g.GetSystemConfig(); !ok {
			return util.Errorf("expected system config to be set")
		}
		return nil
	})

	return g, stopper
}
Ejemplo n.º 28
0
// TestMetricsRecording verifies that Node statistics are periodically recorded
// as time series data.
func TestMetricsRecording(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{
		MetricsSampleInterval: 5 * time.Millisecond})
	defer s.Stopper().Stop()

	checkTimeSeriesKey := func(now int64, keyName string) error {
		key := ts.MakeDataKey(keyName, "", ts.Resolution10s, now)
		data := roachpb.InternalTimeSeriesData{}
		return kvDB.GetProto(key, &data)
	}

	// Verify that metrics for the current timestamp are recorded. This should
	// be true very quickly.
	util.SucceedsSoon(t, func() error {
		now := s.Clock().PhysicalNow()
		if err := checkTimeSeriesKey(now, "cr.store.livebytes.1"); err != nil {
			return err
		}
		if err := checkTimeSeriesKey(now, "cr.node.sys.go.allocbytes.1"); err != nil {
			return err
		}
		return nil
	})
}
Ejemplo n.º 29
0
// TestClientRetryBootstrap verifies that an initial failure to connect
// to a bootstrap host doesn't stall the bootstrapping process in the
// absence of any additional activity. This can happen during acceptance
// tests if the DNS can't lookup hostnames when gossip is started.
func TestClientRetryBootstrap(t *testing.T) {
	defer leaktest.AfterTest(t)()
	stopper := stop.NewStopper()
	defer stopper.Stop()
	local := startGossip(1, stopper, t)
	remote := startGossip(2, stopper, t)
	remote.mu.Lock()
	rAddr := remote.is.NodeAddr
	remote.mu.Unlock()

	if err := local.AddInfo("local-key", []byte("hello"), 0*time.Second); err != nil {
		t.Fatal(err)
	}

	local.SetBootstrapInterval(10 * time.Millisecond)
	local.SetResolvers([]resolver.Resolver{
		&testResolver{addr: rAddr.String(), numFails: 3, numSuccesses: 1},
	})
	local.bootstrap()
	local.manage()

	util.SucceedsSoon(t, func() error {
		if _, err := remote.GetInfo("local-key"); err != nil {
			return err
		}
		return nil
	})
}
Ejemplo n.º 30
0
// TestRemoveRangeWithoutGC ensures that we do not panic when a
// replica has been removed but not yet GC'd (and therefore
// does not have an active raft group).
func TestRemoveRangeWithoutGC(t *testing.T) {
	defer leaktest.AfterTest(t)()

	mtc := startMultiTestContext(t, 2)
	defer mtc.Stop()
	// Disable the GC queue and move the range from store 0 to 1.
	mtc.stores[0].DisableReplicaGCQueue(true)
	const rangeID roachpb.RangeID = 1
	mtc.replicateRange(rangeID, 1)
	mtc.unreplicateRange(rangeID, 0)

	// Wait for store 0 to process the removal.
	util.SucceedsSoon(t, func() error {
		rep, err := mtc.stores[0].GetReplica(rangeID)
		if err != nil {
			return err
		}
		desc := rep.Desc()
		if len(desc.Replicas) != 1 {
			return util.Errorf("range has %d replicas", len(desc.Replicas))
		}
		return nil
	})

	// The replica's data is still on disk even though the Replica
	// object is removed.
	var desc roachpb.RangeDescriptor
	descKey := keys.RangeDescriptorKey(roachpb.RKeyMin)
	if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), descKey,
		mtc.stores[0].Clock().Now(), true, nil, &desc); err != nil {
		t.Fatal(err)
	} else if !ok {
		t.Fatal("expected range descriptor to be present")
	}

	// Stop and restart the store to reset the replica's raftGroup
	// pointer to nil. As long as the store has not been restarted it
	// can continue to use its last known replica ID.
	mtc.stopStore(0)
	mtc.restartStore(0)
	// Turn off the GC queue to ensure that the replica is deleted at
	// startup instead of by the scanner. This is not 100% guaranteed
	// since the scanner could have already run at this point, but it
	// should be enough to prevent us from accidentally relying on the
	// scanner.
	mtc.stores[0].DisableReplicaGCQueue(true)

	// The Replica object is not recreated.
	if _, err := mtc.stores[0].GetReplica(rangeID); err == nil {
		t.Fatalf("expected replica to be missing")
	}

	// And the data is no longer on disk.
	if ok, err := engine.MVCCGetProto(mtc.stores[0].Engine(), descKey,
		mtc.stores[0].Clock().Now(), true, nil, &desc); err != nil {
		t.Fatal(err)
	} else if ok {
		t.Fatal("expected range descriptor to be absent")
	}
}