Beispiel #1
0
func maybeUnmarshalInline(v []byte, dest proto.Message) error {
	var meta enginepb.MVCCMetadata
	if err := meta.Unmarshal(v); err != nil {
		return err
	}
	value := roachpb.Value{
		RawBytes: meta.RawBytes,
	}
	return value.GetProto(dest)
}
Beispiel #2
0
func tryMeta(kv engine.MVCCKeyValue) (string, error) {
	if !bytes.HasPrefix(kv.Key.Key, keys.Meta1Prefix) && !bytes.HasPrefix(kv.Key.Key, keys.Meta2Prefix) {
		return "", errors.New("not a meta key")
	}
	value := roachpb.Value{
		Timestamp: kv.Key.Timestamp,
		RawBytes:  kv.Value,
	}
	var desc roachpb.RangeDescriptor
	if err := value.GetProto(&desc); err != nil {
		return "", err
	}
	return descStr(desc), nil
}
Beispiel #3
0
// storeGossipUpdate is the gossip callback used to keep the StorePool up to date.
func (sp *StorePool) storeGossipUpdate(_ string, content roachpb.Value) {
	var storeDesc roachpb.StoreDescriptor
	if err := content.GetProto(&storeDesc); err != nil {
		ctx := sp.AnnotateCtx(context.TODO())
		log.Error(ctx, err)
		return
	}

	sp.mu.Lock()
	defer sp.mu.Unlock()
	// Does this storeDetail exist yet?
	detail := sp.getStoreDetailLocked(storeDesc.StoreID)
	detail.markAlive(sp.clock.Now(), &storeDesc)
	sp.mu.queue.enqueue(detail)
}
Beispiel #4
0
// storeGossipUpdate is the gossip callback used to keep the StorePool up to date.
func (sp *StorePool) storeGossipUpdate(_ string, content roachpb.Value) {
	var storeDesc roachpb.StoreDescriptor
	if err := content.GetProto(&storeDesc); err != nil {
		ctx := sp.AnnotateCtx(context.TODO())
		log.Error(ctx, err)
		return
	}

	sp.mu.Lock()
	defer sp.mu.Unlock()
	detail := sp.getStoreDetailLocked(storeDesc.StoreID)
	detail.desc = &storeDesc
	detail.lastUpdatedTime = sp.clock.PhysicalTime()
	sp.mu.nodeLocalities[storeDesc.Node.NodeID] = storeDesc.Node.Locality
}
Beispiel #5
0
// deadReplicasGossipUpdate is the gossip callback used to keep the StorePool up to date.
func (sp *StorePool) deadReplicasGossipUpdate(_ string, content roachpb.Value) {
	var replicas roachpb.StoreDeadReplicas
	if err := content.GetProto(&replicas); err != nil {
		ctx := sp.AnnotateCtx(context.TODO())
		log.Error(ctx, err)
		return
	}

	sp.mu.Lock()
	defer sp.mu.Unlock()
	detail := sp.getStoreDetailLocked(replicas.StoreID)
	deadReplicas := make(map[roachpb.RangeID][]roachpb.ReplicaDescriptor)
	for _, r := range replicas.Replicas {
		deadReplicas[r.RangeID] = append(deadReplicas[r.RangeID], r.Replica)
	}
	detail.deadReplicas = deadReplicas
}
Beispiel #6
0
// livenessGossipUpdate is the gossip callback used to keep the
// in-memory liveness info up to date.
func (nl *NodeLiveness) livenessGossipUpdate(key string, content roachpb.Value) {
	var liveness Liveness
	if err := content.GetProto(&liveness); err != nil {
		log.Error(context.TODO(), err)
		return
	}

	// If there's an existing liveness record, only update the received
	// timestamp if this is our first receipt of this node's liveness
	// or if the expiration or epoch was advanced.
	nl.mu.Lock()
	defer nl.mu.Unlock()
	exLiveness, ok := nl.mu.nodes[liveness.NodeID]
	if !ok || exLiveness.Expiration.Less(liveness.Expiration) || exLiveness.Epoch < liveness.Epoch {
		nl.mu.nodes[liveness.NodeID] = liveness
	}
}
Beispiel #7
0
// updateNodeAddress is a gossip callback which fires with each
// update to the node address. This allows us to compute the
// total size of the gossip network (for determining max peers
// each gossip node is allowed to have), as well as to create
// new resolvers for each encountered host and to write the
// set of gossip node addresses to persistent storage when it
// changes.
func (g *Gossip) updateNodeAddress(_ string, content roachpb.Value) {
	ctx := g.AnnotateCtx(context.TODO())
	var desc roachpb.NodeDescriptor
	if err := content.GetProto(&desc); err != nil {
		log.Error(ctx, err)
		return
	}

	g.mu.Lock()
	defer g.mu.Unlock()

	// Skip if the node has already been seen.
	if _, ok := g.nodeDescs[desc.NodeID]; ok {
		return
	}

	g.nodeDescs[desc.NodeID] = &desc

	// Recompute max peers based on size of network and set the max
	// sizes for incoming and outgoing node sets.
	maxPeers := g.maxPeers(len(g.nodeDescs))
	g.mu.incoming.setMaxSize(maxPeers)
	g.outgoing.setMaxSize(maxPeers)

	// Skip if it's our own address.
	if desc.Address == g.mu.is.NodeAddr {
		return
	}

	// Add this new node address (if it's not already there) to our list
	// of resolvers so we can keep connecting to gossip if the original
	// resolvers go offline.
	g.maybeAddResolver(desc.Address)

	// Add new address (if it's not already there) to bootstrap info and
	// persist if possible.
	if g.storage != nil && g.maybeAddBootstrapAddress(desc.Address) {
		if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil {
			log.Error(ctx, err)
		}
	}
}
Beispiel #8
0
// MigrateZoneConfig migrates the legacy ZoneConfig format into the new one.
func MigrateZoneConfig(value *roachpb.Value) (ZoneConfig, error) {
	var zone ZoneConfig
	if err := value.GetProto(&zone); err != nil {
		return ZoneConfig{}, err
	}
	if len(zone.ReplicaAttrs) > 0 {
		if zone.NumReplicas > 0 || len(zone.Constraints.Constraints) > 0 {
			return ZoneConfig{}, errors.New("migration to new ZoneConfig failed due to previous partial upgrade")
		}
		zone.NumReplicas = int32(len(zone.ReplicaAttrs))
		if zone.NumReplicas > 0 {
			attrs := zone.ReplicaAttrs[0].Attrs
			zone.Constraints.Constraints = make([]Constraint, len(attrs))
			for i, attr := range attrs {
				zone.Constraints.Constraints[i].Value = attr
			}
		}
		zone.ReplicaAttrs = nil
	}
	return zone, nil
}
Beispiel #9
0
// updateSystemConfig is the raw gossip info callback.
// Unmarshal the system config, and if successfully, update out
// copy and run the callbacks.
func (g *Gossip) updateSystemConfig(key string, content roachpb.Value) {
	ctx := g.AnnotateCtx(context.TODO())
	if key != KeySystemConfig {
		log.Fatalf(ctx, "wrong key received on SystemConfig callback: %s", key)
	}
	cfg := config.SystemConfig{}
	if err := content.GetProto(&cfg); err != nil {
		log.Errorf(ctx, "could not unmarshal system config on callback: %s", err)
		return
	}

	g.systemConfigMu.Lock()
	defer g.systemConfigMu.Unlock()
	g.systemConfig = cfg
	g.systemConfigSet = true
	for _, c := range g.systemConfigChannels {
		select {
		case c <- struct{}{}:
		default:
		}
	}
}
Beispiel #10
0
func getProtoValue(data []byte, msg proto.Message) error {
	value := roachpb.Value{
		RawBytes: data,
	}
	return value.GetProto(msg)
}
Beispiel #11
0
func tryRangeIDKey(kv engine.MVCCKeyValue) (string, error) {
	if kv.Key.Timestamp != hlc.ZeroTimestamp {
		return "", fmt.Errorf("range ID keys shouldn't have timestamps: %s", kv.Key)
	}
	_, _, suffix, _, err := keys.DecodeRangeIDKey(kv.Key.Key)
	if err != nil {
		return "", err
	}

	// All range ID keys are stored inline on the metadata.
	var meta enginepb.MVCCMetadata
	if err := meta.Unmarshal(kv.Value); err != nil {
		return "", err
	}
	value := roachpb.Value{RawBytes: meta.RawBytes}

	// Values encoded as protobufs set msg and continue outside the
	// switch. Other types are handled inside the switch and return.
	var msg proto.Message
	switch {
	case bytes.Equal(suffix, keys.LocalLeaseAppliedIndexSuffix):
		fallthrough
	case bytes.Equal(suffix, keys.LocalRaftAppliedIndexSuffix):
		i, err := value.GetInt()
		if err != nil {
			return "", err
		}
		return strconv.FormatInt(i, 10), nil

	case bytes.Equal(suffix, keys.LocalRangeFrozenStatusSuffix):
		b, err := value.GetBool()
		if err != nil {
			return "", err
		}
		return strconv.FormatBool(b), nil

	case bytes.Equal(suffix, keys.LocalAbortCacheSuffix):
		msg = &roachpb.AbortCacheEntry{}

	case bytes.Equal(suffix, keys.LocalRangeLastGCSuffix):
		msg = &hlc.Timestamp{}

	case bytes.Equal(suffix, keys.LocalRaftTombstoneSuffix):
		msg = &roachpb.RaftTombstone{}

	case bytes.Equal(suffix, keys.LocalRaftTruncatedStateSuffix):
		msg = &roachpb.RaftTruncatedState{}

	case bytes.Equal(suffix, keys.LocalRangeLeaseSuffix):
		msg = &roachpb.Lease{}

	case bytes.Equal(suffix, keys.LocalRangeStatsSuffix):
		msg = &enginepb.MVCCStats{}

	case bytes.Equal(suffix, keys.LocalRaftHardStateSuffix):
		msg = &raftpb.HardState{}

	case bytes.Equal(suffix, keys.LocalRaftLastIndexSuffix):
		i, err := value.GetInt()
		if err != nil {
			return "", err
		}
		return strconv.FormatInt(i, 10), nil

	case bytes.Equal(suffix, keys.LocalRangeLastVerificationTimestampSuffixDeprecated):
		msg = &hlc.Timestamp{}

	case bytes.Equal(suffix, keys.LocalRangeLastReplicaGCTimestampSuffix):
		msg = &hlc.Timestamp{}

	default:
		return "", fmt.Errorf("unknown raft id key %s", suffix)
	}

	if err := value.GetProto(msg); err != nil {
		return "", err
	}
	return msg.String(), nil
}
Beispiel #12
0
func TestSystemConfigGossip(t *testing.T) {
	defer leaktest.AfterTest(t)()
	t.Skip("#12351")

	s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{})
	defer s.Stopper().Stop()
	ts := s.(*TestServer)
	ctx := context.TODO()

	key := sqlbase.MakeDescMetadataKey(keys.MaxReservedDescID)
	valAt := func(i int) *sqlbase.DatabaseDescriptor {
		return &sqlbase.DatabaseDescriptor{Name: "foo", ID: sqlbase.ID(i)}
	}

	// Register a callback for gossip updates.
	resultChan := ts.Gossip().RegisterSystemConfigChannel()

	// The span gets gossiped when it first shows up.
	select {
	case <-resultChan:

	case <-time.After(500 * time.Millisecond):
		t.Fatal("did not receive gossip message")
	}

	// Try a plain KV write first.
	if err := kvDB.Put(ctx, key, valAt(0)); err != nil {
		t.Fatal(err)
	}

	// Now do it as part of a transaction, but without the trigger set.
	if err := kvDB.Txn(ctx, func(txn *client.Txn) error {
		return txn.Put(key, valAt(1))
	}); err != nil {
		t.Fatal(err)
	}

	// Gossip channel should be dormant.
	// TODO(tschottdorf): This test is likely flaky. Why can't some other
	// process trigger gossip? It seems that a new range lease being
	// acquired will gossip a new system config since the hash changed and fail
	// the test (seen in practice during some buggy WIP).
	var systemConfig config.SystemConfig
	select {
	case <-resultChan:
		systemConfig, _ = ts.gossip.GetSystemConfig()
		t.Fatalf("unexpected message received on gossip channel: %v", systemConfig)

	case <-time.After(50 * time.Millisecond):
	}

	// This time mark the transaction as having a Gossip trigger.
	if err := kvDB.Txn(ctx, func(txn *client.Txn) error {
		txn.SetSystemConfigTrigger()
		return txn.Put(key, valAt(2))
	}); err != nil {
		t.Fatal(err)
	}

	// New system config received.
	select {
	case <-resultChan:
		systemConfig, _ = ts.gossip.GetSystemConfig()

	case <-time.After(500 * time.Millisecond):
		t.Fatal("did not receive gossip message")
	}

	// Now check the new config.
	var val *roachpb.Value
	for _, kv := range systemConfig.Values {
		if bytes.Equal(key, kv.Key) {
			val = &kv.Value
			break
		}
	}
	if val == nil {
		t.Fatal("key not found in gossiped info")
	}

	// Make sure the returned value is valAt(2).
	got := new(sqlbase.DatabaseDescriptor)
	if err := val.GetProto(got); err != nil {
		t.Fatal(err)
	}
	if expected := valAt(2); !reflect.DeepEqual(got, expected) {
		t.Fatalf("mismatch: expected %+v, got %+v", *expected, *got)
	}
}
Beispiel #13
0
// updateNodeAddress is a gossip callback which fires with each
// update to the node address. This allows us to compute the
// total size of the gossip network (for determining max peers
// each gossip node is allowed to have), as well as to create
// new resolvers for each encountered host and to write the
// set of gossip node addresses to persistent storage when it
// changes.
func (g *Gossip) updateNodeAddress(key string, content roachpb.Value) {
	ctx := g.AnnotateCtx(context.TODO())
	var desc roachpb.NodeDescriptor
	if err := content.GetProto(&desc); err != nil {
		log.Error(ctx, err)
		return
	}

	g.mu.Lock()
	defer g.mu.Unlock()

	// If desc is the empty descriptor, that indicates that the node has been
	// removed from the cluster. If that's the case, remove it from our map of
	// nodes to prevent other parts of the system from trying to talk to it.
	// We can't directly compare the node against the empty descriptor because
	// the proto has a repeated field and thus isn't comparable.
	if desc.NodeID == 0 && desc.Address.IsEmpty() {
		nodeID, err := NodeIDFromKey(key)
		if err != nil {
			log.Errorf(ctx, "unable to update node address for removed node: %s", err)
			return
		}
		log.Infof(ctx, "removed node %d from gossip", nodeID)
		delete(g.nodeDescs, nodeID)
		return
	}

	// Skip if the node has already been seen.
	if _, ok := g.nodeDescs[desc.NodeID]; ok {
		return
	}
	g.nodeDescs[desc.NodeID] = &desc

	// Recompute max peers based on size of network and set the max
	// sizes for incoming and outgoing node sets.
	maxPeers := g.maxPeers(len(g.nodeDescs))
	g.mu.incoming.setMaxSize(maxPeers)
	g.outgoing.setMaxSize(maxPeers)

	// Skip if it's our own address.
	if desc.Address == g.mu.is.NodeAddr {
		return
	}

	// Add this new node address (if it's not already there) to our list
	// of resolvers so we can keep connecting to gossip if the original
	// resolvers go offline.
	g.maybeAddResolver(desc.Address)

	// We ignore empty addresses for the sake of not breaking the many tests
	// that don't bother specifying addresses.
	if desc.Address.IsEmpty() {
		return
	}

	// If the new node's address conflicts with another node's address, then it
	// must be the case that the new node has replaced the previous one. Remove
	// it from our set of tracked descriptors to ensure we don't attempt to
	// connect to its previous identity (as came up in issue #10266).
	oldNodeID, ok := g.bootstrapAddrs[desc.Address]
	if ok && oldNodeID != unknownNodeID && oldNodeID != desc.NodeID {
		log.Infof(ctx, "removing node %d which was at same address (%s) as new node %v",
			oldNodeID, desc.Address, desc)
		delete(g.nodeDescs, oldNodeID)

		// Deleting the local copy isn't enough to remove the node from the gossip
		// network. We also have to clear it out in the infoStore by overwriting
		// it with an empty descriptor, which can be represented as just an empty
		// byte array due to how protocol buffers are serialied.
		// Calling addInfoLocked here is somewhat recursive since
		// updateNodeAddress is typically called in response to the infoStore
		// being updated but won't lead to deadlock because it's called
		// asynchronously.
		key := MakeNodeIDKey(oldNodeID)
		var emptyProto []byte
		if err := g.addInfoLocked(key, emptyProto, ttlNodeDescriptorGossip); err != nil {
			log.Errorf(ctx, "failed to empty node descriptor for node %d: %s", oldNodeID, err)
		}
	}
	// Add new address (if it's not already there) to bootstrap info and
	// persist if possible.
	added := g.maybeAddBootstrapAddress(desc.Address, desc.NodeID)
	if added && g.storage != nil {
		if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil {
			log.Error(ctx, err)
		}
	}
}
Beispiel #14
0
func TestSystemConfigGossip(t *testing.T) {
	defer leaktest.AfterTest(t)()
	t.Skip("#12351")

	s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{})
	defer s.Stopper().Stop()
	ts := s.(*TestServer)
	ctx := context.TODO()

	key := sqlbase.MakeDescMetadataKey(keys.MaxReservedDescID)
	valAt := func(i int) *sqlbase.DatabaseDescriptor {
		return &sqlbase.DatabaseDescriptor{Name: "foo", ID: sqlbase.ID(i)}
	}

	// Register a callback for gossip updates.
	resultChan := ts.Gossip().RegisterSystemConfigChannel()

	// The span gets gossiped when it first shows up.
	select {
	case <-resultChan:

	case <-time.After(500 * time.Millisecond):
		t.Fatal("did not receive gossip message")
	}

	// Write a system key with the transaction marked as having a Gossip trigger.
	if err := kvDB.Txn(ctx, func(txn *client.Txn) error {
		txn.SetSystemConfigTrigger()
		return txn.Put(key, valAt(2))
	}); err != nil {
		t.Fatal(err)
	}

	// This has to be wrapped in a SucceedSoon because system migrations on the
	// testserver's startup can trigger system config updates without the key we
	// wrote.
	testutils.SucceedsSoon(t, func() error {
		// New system config received.
		var systemConfig config.SystemConfig
		select {
		case <-resultChan:
			systemConfig, _ = ts.gossip.GetSystemConfig()

		case <-time.After(500 * time.Millisecond):
			return errors.Errorf("did not receive gossip message")
		}

		// Now check the new config.
		var val *roachpb.Value
		for _, kv := range systemConfig.Values {
			if bytes.Equal(key, kv.Key) {
				val = &kv.Value
				break
			}
		}
		if val == nil {
			return errors.Errorf("key not found in gossiped info")
		}

		// Make sure the returned value is valAt(2).
		got := new(sqlbase.DatabaseDescriptor)
		if err := val.GetProto(got); err != nil {
			return err
		}
		if expected := valAt(2); !reflect.DeepEqual(got, expected) {
			return errors.Errorf("mismatch: expected %+v, got %+v", *expected, *got)
		}
		return nil
	})
}