예제 #1
0
func setAppliedIndex(
	ctx context.Context,
	eng engine.ReadWriter,
	ms *enginepb.MVCCStats,
	rangeID roachpb.RangeID,
	appliedIndex,
	leaseAppliedIndex uint64,
) error {
	var value roachpb.Value
	value.SetInt(int64(appliedIndex))

	if err := engine.MVCCPut(ctx, eng, ms,
		keys.RaftAppliedIndexKey(rangeID),
		hlc.ZeroTimestamp,
		value,
		nil /* txn */); err != nil {
		return err
	}
	value.SetInt(int64(leaseAppliedIndex))
	return engine.MVCCPut(ctx, eng, ms,
		keys.LeaseAppliedIndexKey(rangeID),
		hlc.ZeroTimestamp,
		value,
		nil /* txn */)
}
예제 #2
0
// setAppliedIndex persists a new applied index.
func setAppliedIndex(eng engine.Engine, raftID proto.RaftID, appliedIndex uint64) error {
	return engine.MVCCPut(eng, nil, /* stats */
		keys.RaftAppliedIndexKey(raftID),
		proto.ZeroTimestamp,
		proto.Value{Bytes: encoding.EncodeUint64(nil, appliedIndex)},
		nil /* txn */)
}
예제 #3
0
// setAppliedIndex persists a new applied index.
func setAppliedIndex(eng engine.Engine, rangeID roachpb.RangeID, appliedIndex uint64) error {
	return engine.MVCCPut(eng, nil, /* stats */
		keys.RaftAppliedIndexKey(rangeID),
		roachpb.ZeroTimestamp,
		roachpb.MakeValueFromBytes(encoding.EncodeUint64(nil, appliedIndex)),
		nil /* txn */)
}
예제 #4
0
func setFrozenStatus(
	eng engine.ReadWriter, ms *enginepb.MVCCStats, rangeID roachpb.RangeID, frozen bool,
) error {
	var val roachpb.Value
	val.SetBool(frozen)
	return engine.MVCCPut(context.Background(), eng, ms,
		keys.RangeFrozenStatusKey(rangeID), hlc.ZeroTimestamp, val, nil)
}
예제 #5
0
func setLastIndex(eng engine.ReadWriter, rangeID roachpb.RangeID, lastIndex uint64) error {
	var value roachpb.Value
	value.SetInt(int64(lastIndex))

	return engine.MVCCPut(context.Background(), eng, nil, keys.RaftLastIndexKey(rangeID),
		hlc.ZeroTimestamp,
		value,
		nil /* txn */)
}
예제 #6
0
// setLastIndex persists a new last index.
func setLastIndex(eng engine.Engine, rangeID roachpb.RangeID, lastIndex uint64) error {
	var value roachpb.Value
	value.SetInt(int64(lastIndex))

	return engine.MVCCPut(eng, nil, keys.RaftLastIndexKey(rangeID),
		roachpb.ZeroTimestamp,
		value,
		nil /* txn */)
}
예제 #7
0
// setAppliedIndex persists a new applied index.
func setAppliedIndex(eng engine.Engine, ms *engine.MVCCStats, rangeID roachpb.RangeID, appliedIndex uint64) error {
	var value roachpb.Value
	value.SetInt(int64(appliedIndex))

	return engine.MVCCPut(eng, ms,
		keys.RaftAppliedIndexKey(rangeID),
		roachpb.ZeroTimestamp,
		value,
		nil /* txn */)
}
예제 #8
0
// Indirectly this tests that the transaction remembers the NodeID of the node
// being read from correctly, at least in this simple case. Not remembering the
// node would lead to thousands of transaction restarts and almost certainly a
// test timeout.
func TestUncertaintyRestarts(t *testing.T) {
	{
		db, eng, clock, mClock, _, transport, err := createTestDB()
		if err != nil {
			t.Fatal(err)
		}
		defer transport.Close()
		// Set a large offset so that a busy restart-loop
		// really shows. Also makes sure that the values
		// we write in the future below don't actually
		// wind up in the past.
		offset := 4000 * time.Millisecond
		clock.SetMaxOffset(offset)
		key := proto.Key("key")
		value := proto.Value{
			Bytes: nil, // Set for each Put
		}
		// With the correct restart behaviour, we see only one restart
		// and the value read is the very first one (as nothing else
		// has been written)
		wantedBytes := []byte("value-0")

		txnOpts := &client.TransactionOptions{
			Name: "uncertainty",
		}
		gr := &proto.GetResponse{}
		i := -1
		tErr := db.RunTransaction(txnOpts, func(txn *client.KV) error {
			i++
			mClock.Increment(1)
			futureTS := clock.Now()
			futureTS.WallTime++
			value.Bytes = []byte(fmt.Sprintf("value-%d", i))
			err = engine.MVCCPut(eng, nil, key, futureTS, value, nil)
			if err != nil {
				t.Fatal(err)
			}
			gr.Reset()
			if err := txn.Call(proto.Get, proto.GetArgs(key), gr); err != nil {
				return err
			}
			if gr.Value == nil || !bytes.Equal(gr.Value.Bytes, wantedBytes) {
				t.Fatalf("%d: read wrong value: %v, wanted %q", i,
					gr.Value, wantedBytes)
			}
			return nil
		})
		if i != 1 {
			t.Errorf("txn restarted %d times, expected only one restart", i)
		}
		if tErr != nil {
			t.Fatal(tErr)
		}
	}
}
예제 #9
0
// PutSequence writes a sequence number for the specified family.
func (rc *ResponseCache) PutSequence(e engine.Engine, family []byte, sequence int64, err error) error {
	if sequence <= 0 || len(family) == 0 {
		return errEmptyID
	}
	if !rc.shouldCacheError(err) {
		return nil
	}

	// Write the response value to the engine.
	key := keys.ResponseCacheKey(rc.rangeID, family)
	var v roachpb.Value
	v.SetInt(sequence)
	return engine.MVCCPut(e, nil /* ms */, key, roachpb.ZeroTimestamp, v, nil /* txn */)
}
예제 #10
0
// append the given entries to the raft log. Takes the previous values of
// r.mu.lastIndex and r.mu.raftLogSize, and returns new values. We do this
// rather than modifying them directly because these modifications need to be
// atomic with the commit of the batch.
func (r *Replica) append(
	ctx context.Context,
	batch engine.ReadWriter,
	prevLastIndex uint64,
	prevRaftLogSize int64,
	entries []raftpb.Entry,
) (uint64, int64, error) {
	if len(entries) == 0 {
		return prevLastIndex, prevRaftLogSize, nil
	}
	var diff enginepb.MVCCStats
	var value roachpb.Value
	for i := range entries {
		ent := &entries[i]
		key := keys.RaftLogKey(r.RangeID, ent.Index)
		if err := value.SetProto(ent); err != nil {
			return 0, 0, err
		}
		value.InitChecksum(key)
		var err error
		if ent.Index > prevLastIndex {
			err = engine.MVCCBlindPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */)
		} else {
			err = engine.MVCCPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */)
		}
		if err != nil {
			return 0, 0, err
		}
	}

	// Delete any previously appended log entries which never committed.
	lastIndex := entries[len(entries)-1].Index
	for i := lastIndex + 1; i <= prevLastIndex; i++ {
		err := engine.MVCCDelete(ctx, batch, &diff, keys.RaftLogKey(r.RangeID, i),
			hlc.ZeroTimestamp, nil /* txn */)
		if err != nil {
			return 0, 0, err
		}
	}

	if err := setLastIndex(ctx, batch, r.RangeID, lastIndex); err != nil {
		return 0, 0, err
	}

	raftLogSize := prevRaftLogSize + diff.SysBytes

	return lastIndex, raftLogSize, nil
}
예제 #11
0
// createRangeData creates sample range data in all possible areas of
// the key space. Returns a slice of the encoded keys of all created
// data.
func createRangeData(t *testing.T, r *Replica) []engine.MVCCKey {
	ts0 := hlc.ZeroTimestamp
	ts := hlc.Timestamp{WallTime: 1}
	desc := r.Desc()
	keyTSs := []struct {
		key roachpb.Key
		ts  hlc.Timestamp
	}{
		{keys.AbortCacheKey(r.RangeID, testTxnID), ts0},
		{keys.AbortCacheKey(r.RangeID, testTxnID2), ts0},
		{keys.RangeFrozenStatusKey(r.RangeID), ts0},
		{keys.RangeLastGCKey(r.RangeID), ts0},
		{keys.RaftAppliedIndexKey(r.RangeID), ts0},
		{keys.RaftTruncatedStateKey(r.RangeID), ts0},
		{keys.LeaseAppliedIndexKey(r.RangeID), ts0},
		{keys.RangeStatsKey(r.RangeID), ts0},
		{keys.RaftHardStateKey(r.RangeID), ts0},
		{keys.RaftLastIndexKey(r.RangeID), ts0},
		{keys.RaftLogKey(r.RangeID, 1), ts0},
		{keys.RaftLogKey(r.RangeID, 2), ts0},
		{keys.RangeLastReplicaGCTimestampKey(r.RangeID), ts0},
		{keys.RangeLastVerificationTimestampKey(r.RangeID), ts0},
		{keys.RangeDescriptorKey(desc.StartKey), ts},
		{keys.TransactionKey(roachpb.Key(desc.StartKey), uuid.NewV4()), ts0},
		{keys.TransactionKey(roachpb.Key(desc.StartKey.Next()), uuid.NewV4()), ts0},
		{keys.TransactionKey(fakePrevKey(desc.EndKey), uuid.NewV4()), ts0},
		// TODO(bdarnell): KeyMin.Next() results in a key in the reserved system-local space.
		// Once we have resolved https://github.com/cockroachdb/cockroach/issues/437,
		// replace this with something that reliably generates the first valid key in the range.
		//{r.Desc().StartKey.Next(), ts},
		// The following line is similar to StartKey.Next() but adds more to the key to
		// avoid falling into the system-local space.
		{append(append([]byte{}, desc.StartKey...), '\x02'), ts},
		{fakePrevKey(r.Desc().EndKey), ts},
	}

	keys := []engine.MVCCKey{}
	for _, keyTS := range keyTSs {
		if err := engine.MVCCPut(context.Background(), r.store.Engine(), nil, keyTS.key, keyTS.ts, roachpb.MakeValueFromString("value"), nil); err != nil {
			t.Fatal(err)
		}
		keys = append(keys, engine.MVCCKey{Key: keyTS.key, Timestamp: keyTS.ts})
	}
	return keys
}
예제 #12
0
// Indirectly this tests that the transaction remembers the NodeID of the node
// being read from correctly, at least in this simple case. Not remembering the
// node would lead to thousands of transaction restarts and almost certainly a
// test timeout.
func TestUncertaintyRestarts(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := createTestDB(t)
	defer s.Stop()
	// Set a large offset so that a busy restart-loop
	// really shows. Also makes sure that the values
	// we write in the future below don't actually
	// wind up in the past.
	offset := 4000 * time.Millisecond
	s.Clock.SetMaxOffset(offset)
	key := proto.Key("key")
	value := proto.Value{
		Bytes: nil, // Set for each Put
	}
	// With the correct restart behaviour, we see only one restart
	// and the value read is the very first one (as nothing else
	// has been written)
	wantedBytes := []byte("value-0")

	i := -1
	tErr := s.DB.Txn(func(txn *client.Txn) error {
		i++
		s.Manual.Increment(1)
		futureTS := s.Clock.Now()
		futureTS.WallTime++
		value.Bytes = []byte(fmt.Sprintf("value-%d", i))
		if err := engine.MVCCPut(s.Eng, nil, key, futureTS, value, nil); err != nil {
			t.Fatal(err)
		}
		gr, err := txn.Get(key)
		if err != nil {
			return err
		}
		if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), wantedBytes) {
			t.Fatalf("%d: read wrong value: %v, wanted %q", i, gr.Value, wantedBytes)
		}
		return nil
	})
	if i != 1 {
		t.Errorf("txn restarted %d times, expected only one restart", i)
	}
	if tErr != nil {
		t.Fatal(tErr)
	}
}
예제 #13
0
// createRangeData creates sample range data in all possible areas of
// the key space. Returns a slice of the encoded keys of all created
// data.
func createRangeData(r *Replica, t *testing.T) []roachpb.EncodedKey {
	ts0 := roachpb.ZeroTimestamp
	ts := roachpb.Timestamp{WallTime: 1}
	keyTSs := []struct {
		key roachpb.Key
		ts  roachpb.Timestamp
	}{
		{keys.ResponseCacheKey(r.Desc().RangeID, &roachpb.ClientCmdID{WallTime: 1, Random: 1}), ts0},
		{keys.ResponseCacheKey(r.Desc().RangeID, &roachpb.ClientCmdID{WallTime: 2, Random: 2}), ts0},
		{keys.RaftHardStateKey(r.Desc().RangeID), ts0},
		{keys.RaftLogKey(r.Desc().RangeID, 1), ts0},
		{keys.RaftLogKey(r.Desc().RangeID, 2), ts0},
		{keys.RangeGCMetadataKey(r.Desc().RangeID), ts0},
		{keys.RangeLastVerificationTimestampKey(r.Desc().RangeID), ts0},
		{keys.RangeStatsKey(r.Desc().RangeID), ts0},
		{keys.RangeDescriptorKey(r.Desc().StartKey), ts},
		{keys.TransactionKey(roachpb.Key(r.Desc().StartKey), []byte("1234")), ts0},
		{keys.TransactionKey(roachpb.Key(r.Desc().StartKey.Next()), []byte("5678")), ts0},
		{keys.TransactionKey(fakePrevKey(r.Desc().EndKey), []byte("2468")), ts0},
		// TODO(bdarnell): KeyMin.Next() results in a key in the reserved system-local space.
		// Once we have resolved https://github.com/cockroachdb/cockroach/issues/437,
		// replace this with something that reliably generates the first valid key in the range.
		//{r.Desc().StartKey.Next(), ts},
		// The following line is similar to StartKey.Next() but adds more to the key to
		// avoid falling into the system-local space.
		{append(append([]byte{}, r.Desc().StartKey...), '\x01'), ts},
		{fakePrevKey(r.Desc().EndKey), ts},
	}

	keys := []roachpb.EncodedKey{}
	for _, keyTS := range keyTSs {
		if err := engine.MVCCPut(r.store.Engine(), nil, keyTS.key, keyTS.ts, roachpb.MakeValueFromString("value"), nil); err != nil {
			t.Fatal(err)
		}
		keys = append(keys, engine.MVCCEncodeKey(keyTS.key))
		if !keyTS.ts.Equal(ts0) {
			keys = append(keys, engine.MVCCEncodeVersionKey(keyTS.key, keyTS.ts))
		}
	}
	return keys
}
예제 #14
0
// ApplySnapshot implements the multiraft.WriteableGroupStorage interface.
func (r *Range) ApplySnapshot(snap raftpb.Snapshot) error {
	snapData := proto.RaftSnapshotData{}
	err := gogoproto.Unmarshal(snap.Data, &snapData)
	if err != nil {
		return err
	}

	// First, save the HardState.  The HardState must not be changed
	// because it may record a previous vote cast by this node.
	hardStateKey := keys.RaftHardStateKey(r.Desc().RaftID)
	hardState, _, err := engine.MVCCGet(r.rm.Engine(), hardStateKey, proto.ZeroTimestamp, true /* consistent */, nil)
	if err != nil {
		return err
	}

	// Extract the updated range descriptor.
	desc := snapData.RangeDescriptor

	batch := r.rm.Engine().NewBatch()
	defer batch.Close()

	// Delete everything in the range and recreate it from the snapshot.
	for iter := newRangeDataIterator(&desc, r.rm.Engine()); iter.Valid(); iter.Next() {
		if err := batch.Clear(iter.Key()); err != nil {
			return err
		}
	}

	// Write the snapshot into the range.
	for _, kv := range snapData.KV {
		if err := batch.Put(kv.Key, kv.Value); err != nil {
			return err
		}
	}

	// Restore the saved HardState.
	if hardState == nil {
		err := engine.MVCCDelete(batch, nil, hardStateKey, proto.ZeroTimestamp, nil)
		if err != nil {
			return err
		}
	} else {
		err := engine.MVCCPut(batch, nil, hardStateKey, proto.ZeroTimestamp, *hardState, nil)
		if err != nil {
			return err
		}
	}

	// Read the leader lease.
	lease, err := loadLeaderLease(batch, desc.RaftID)
	if err != nil {
		return err
	}

	// Copy range stats to new range.
	oldStats := r.stats
	r.stats, err = newRangeStats(desc.RaftID, batch)
	if err != nil {
		r.stats = oldStats
		return err
	}

	// The next line sets the persisted last index to the last applied index.
	// This is not a correctness issue, but means that we may have just
	// transferred some entries we're about to re-request from the leader and
	// overwrite.
	// However, raft.MultiNode currently expects this behaviour, and the
	// performance implications are not likely to be drastic. If our feelings
	// about this ever change, we can add a LastIndex field to
	// raftpb.SnapshotMetadata.
	if err := setLastIndex(batch, r.Desc().RaftID, snap.Metadata.Index); err != nil {
		return err
	}

	if err := batch.Commit(); err != nil {
		return err
	}

	// As outlined above, last and applied index are the same after applying
	// the snapshot.
	atomic.StoreUint64(&r.lastIndex, snap.Metadata.Index)
	atomic.StoreUint64(&r.appliedIndex, snap.Metadata.Index)

	// Atomically update the descriptor and lease.
	if err := r.setDesc(&desc); err != nil {
		return err
	}
	atomic.StorePointer(&r.lease, unsafe.Pointer(lease))
	return nil
}
예제 #15
0
// setLastIndex persists a new last index.
func setLastIndex(eng engine.Engine, raftID proto.RaftID, lastIndex uint64) error {
	return engine.MVCCPut(eng, nil, keys.RaftLastIndexKey(raftID),
		proto.ZeroTimestamp, proto.Value{
			Bytes: encoding.EncodeUint64(nil, lastIndex),
		}, nil)
}
예제 #16
0
// applySnapshot updates the replica based on the given snapshot.
func (r *Replica) applySnapshot(snap raftpb.Snapshot) error {
	snapData := roachpb.RaftSnapshotData{}
	err := proto.Unmarshal(snap.Data, &snapData)
	if err != nil {
		return err
	}

	rangeID := r.Desc().RangeID

	// First, save the HardState.  The HardState must not be changed
	// because it may record a previous vote cast by this node.
	hardStateKey := keys.RaftHardStateKey(rangeID)
	hardState, _, err := engine.MVCCGet(r.store.Engine(), hardStateKey, roachpb.ZeroTimestamp, true /* consistent */, nil)
	if err != nil {
		return err
	}

	// Extract the updated range descriptor.
	desc := snapData.RangeDescriptor

	batch := r.store.Engine().NewBatch()
	defer batch.Close()

	// Delete everything in the range and recreate it from the snapshot.
	iter := newReplicaDataIterator(&desc, r.store.Engine())
	defer iter.Close()
	for ; iter.Valid(); iter.Next() {
		if err := batch.Clear(iter.Key()); err != nil {
			return err
		}
	}

	// Write the snapshot into the range.
	for _, kv := range snapData.KV {
		mvccKey := engine.MVCCKey{
			Key:       kv.Key,
			Timestamp: kv.Timestamp,
		}
		if err := batch.Put(mvccKey, kv.Value); err != nil {
			return err
		}
	}

	// Restore the saved HardState.
	if hardState == nil {
		err := engine.MVCCDelete(batch, nil, hardStateKey, roachpb.ZeroTimestamp, nil)
		if err != nil {
			return err
		}
	} else {
		err := engine.MVCCPut(batch, nil, hardStateKey, roachpb.ZeroTimestamp, *hardState, nil)
		if err != nil {
			return err
		}
	}

	// Read the leader lease.
	lease, err := loadLeaderLease(batch, desc.RangeID)
	if err != nil {
		return err
	}

	// Load updated range stats. The local newStats variable will be assigned
	// to r.stats after the batch commits.
	newStats, err := newRangeStats(desc.RangeID, batch)
	if err != nil {
		return err
	}

	// The next line sets the persisted last index to the last applied index.
	// This is not a correctness issue, but means that we may have just
	// transferred some entries we're about to re-request from the leader and
	// overwrite.
	// However, raft.MultiNode currently expects this behaviour, and the
	// performance implications are not likely to be drastic. If our feelings
	// about this ever change, we can add a LastIndex field to
	// raftpb.SnapshotMetadata.
	if err := setLastIndex(batch, rangeID, snap.Metadata.Index); err != nil {
		return err
	}

	if err := batch.Commit(); err != nil {
		return err
	}

	// Update the range stats.
	r.stats.Replace(newStats)

	// As outlined above, last and applied index are the same after applying
	// the snapshot.
	atomic.StoreUint64(&r.lastIndex, snap.Metadata.Index)
	atomic.StoreUint64(&r.appliedIndex, snap.Metadata.Index)

	// Atomically update the descriptor and lease.
	if err := r.setDesc(&desc); err != nil {
		return err
	}
	// Update other fields which are uninitialized or need updating.
	// This may not happen if the system config has not yet been loaded.
	// While config update will correctly set the fields, there is no order
	// guarangee in ApplySnapshot.
	// TODO: should go through the standard store lock when adding a replica.
	if err := r.updateRangeInfo(); err != nil {
		return err
	}

	atomic.StorePointer(&r.lease, unsafe.Pointer(lease))
	return nil
}
예제 #17
0
// applySnapshot updates the replica based on the given snapshot.
// Returns the new last index.
func (r *Replica) applySnapshot(batch engine.Engine, snap raftpb.Snapshot) (uint64, error) {
	snapData := roachpb.RaftSnapshotData{}
	err := proto.Unmarshal(snap.Data, &snapData)
	if err != nil {
		return 0, err
	}

	rangeID := r.RangeID

	// First, save the HardState. The HardState must not be changed
	// because it may record a previous vote cast by this node. This is
	// usually unnecessary because a snapshot is nearly always
	// accompanied by a new HardState which incorporates both our former
	// state and new information from the leader, but in the event that
	// the HardState has not changed, we want to use our own previous
	// HardState and not one that was transmitted via the snapshot.
	hardStateKey := keys.RaftHardStateKey(rangeID)
	hardState, _, err := engine.MVCCGet(batch, hardStateKey, roachpb.ZeroTimestamp, true /* consistent */, nil)
	if err != nil {
		return 0, err
	}

	// Extract the updated range descriptor.
	desc := snapData.RangeDescriptor

	// Delete everything in the range and recreate it from the snapshot.
	// We need to delete any old Raft log entries here because any log entries
	// that predate the snapshot will be orphaned and never truncated or GC'd.
	iter := newReplicaDataIterator(&desc, batch, false /* !replicatedOnly */)
	defer iter.Close()
	for ; iter.Valid(); iter.Next() {
		if err := batch.Clear(iter.Key()); err != nil {
			return 0, err
		}
	}

	// Determine the unreplicated key prefix so we can drop any
	// unreplicated keys from the snapshot.
	unreplicatedPrefix := keys.MakeRangeIDUnreplicatedPrefix(desc.RangeID)

	// Write the snapshot into the range.
	for _, kv := range snapData.KV {
		if bytes.HasPrefix(kv.Key, unreplicatedPrefix) {
			continue
		}
		mvccKey := engine.MVCCKey{
			Key:       kv.Key,
			Timestamp: kv.Timestamp,
		}
		if err := batch.Put(mvccKey, kv.Value); err != nil {
			return 0, err
		}
	}

	// Write the snapshot's Raft log into the range.
	if _, err := r.append(batch, 0, snapData.LogEntries); err != nil {
		return 0, err
	}

	// Restore the saved HardState.
	if hardState == nil {
		err := engine.MVCCDelete(batch, nil, hardStateKey, roachpb.ZeroTimestamp, nil)
		if err != nil {
			return 0, err
		}
	} else {
		err := engine.MVCCPut(batch, nil, hardStateKey, roachpb.ZeroTimestamp, *hardState, nil)
		if err != nil {
			return 0, err
		}
	}

	// Read the leader lease.
	lease, err := loadLeaderLease(batch, desc.RangeID)
	if err != nil {
		return 0, err
	}

	// Load updated range stats. The local newStats variable will be assigned
	// to r.stats after the batch commits.
	newStats, err := newRangeStats(desc.RangeID, batch)
	if err != nil {
		return 0, err
	}

	// The next line sets the persisted last index to the last applied index.
	// This is not a correctness issue, but means that we may have just
	// transferred some entries we're about to re-request from the leader and
	// overwrite.
	// However, raft.MultiNode currently expects this behaviour, and the
	// performance implications are not likely to be drastic. If our feelings
	// about this ever change, we can add a LastIndex field to
	// raftpb.SnapshotMetadata.
	if err := setLastIndex(batch, rangeID, snap.Metadata.Index); err != nil {
		return 0, err
	}

	batch.Defer(func() {
		// Update the range stats.
		r.stats.Replace(newStats)

		r.mu.Lock()
		// As outlined above, last and applied index are the same after applying
		// the snapshot.
		r.mu.appliedIndex = snap.Metadata.Index
		r.mu.leaderLease = lease
		r.mu.Unlock()

		// Update other fields which are uninitialized or need updating.
		// This may not happen if the system config has not yet been loaded.
		// While config update will correctly set the fields, there is no order
		// guarantee in ApplySnapshot.
		// TODO: should go through the standard store lock when adding a replica.
		if err := r.updateRangeInfo(&desc); err != nil {
			panic(err)
		}

		// Update the range descriptor. This is done last as this is the step that
		// makes the Replica visible in the Store.
		if err := r.setDesc(&desc); err != nil {
			panic(err)
		}
	})
	return snap.Metadata.Index, nil
}
예제 #18
0
// Put sets the value for a specified key.
func (r *Range) Put(batch engine.Engine, ms *engine.MVCCStats, args proto.PutRequest) (proto.PutResponse, error) {
	var reply proto.PutResponse

	return reply, engine.MVCCPut(batch, ms, args.Key, args.Timestamp, args.Value, args.Txn)
}
예제 #19
0
// TestUncertaintyObservedTimestampForwarding checks that when receiving an
// uncertainty restart on a node, the next attempt to read (at the increased
// timestamp) is free from uncertainty. See roachpb.Transaction for details.
func TestUncertaintyMaxTimestampForwarding(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s := createTestDB(t)
	disableOwnNodeCertain(s)
	defer s.Stop()
	// Large offset so that any value in the future is an uncertain read.
	// Also makes sure that the values we write in the future below don't
	// actually wind up in the past.
	s.Clock.SetMaxOffset(50 * time.Second)

	offsetNS := int64(100)
	keySlow := roachpb.Key("slow")
	keyFast := roachpb.Key("fast")
	valSlow := []byte("wols")
	valFast := []byte("tsaf")

	// Write keySlow at now+offset, keyFast at now+2*offset
	futureTS := s.Clock.Now()
	futureTS.WallTime += offsetNS
	val := roachpb.MakeValueFromBytes(valSlow)
	if err := engine.MVCCPut(s.Eng, nil, keySlow, futureTS, val, nil); err != nil {
		t.Fatal(err)
	}
	futureTS.WallTime += offsetNS
	val.SetBytes(valFast)
	if err := engine.MVCCPut(s.Eng, nil, keyFast, futureTS, val, nil); err != nil {
		t.Fatal(err)
	}

	i := 0
	if tErr := s.DB.Txn(func(txn *client.Txn) *roachpb.Error {
		i++
		// The first command serves to start a Txn, fixing the timestamps.
		// There will be a restart, but this is idempotent.
		if _, pErr := txn.Scan("t", roachpb.Key("t").Next(), 0); pErr != nil {
			t.Fatal(pErr)
		}
		// This is a bit of a hack for the sake of this test: By visiting the
		// node above, we've made a note of its clock, which allows us to
		// prevent the restart. But we want to catch the restart, so reset the
		// observed timestamps.
		txn.Proto.ResetObservedTimestamps()

		// The server's clock suddenly jumps ahead of keyFast's timestamp.
		s.Manual.Set(2*offsetNS + 1)

		// Now read slowKey first. It should read at 0, catch an uncertainty error,
		// and get keySlow's timestamp in that error, but upgrade it to the larger
		// node clock (which is ahead of keyFast as well). If the last part does
		// not happen, the read of keyFast should fail (i.e. read nothing).
		// There will be exactly one restart here.
		if gr, pErr := txn.Get(keySlow); pErr != nil {
			if i != 1 {
				t.Fatalf("unexpected transaction error: %s", pErr)
			}
			return pErr
		} else if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), valSlow) {
			t.Fatalf("read of %q returned %v, wanted value %q", keySlow, gr.Value, valSlow)
		}

		// The node should already be certain, so we expect no restart here
		// and to read the correct key.
		if gr, pErr := txn.Get(keyFast); pErr != nil {
			t.Fatalf("second Get failed with %s", pErr)
		} else if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), valFast) {
			t.Fatalf("read of %q returned %v, wanted value %q", keyFast, gr.Value, valFast)
		}
		return nil
	}); tErr != nil {
		t.Fatal(tErr)
	}
}
예제 #20
0
// TestUncertaintyMaxTimestampForwarding checks that we correctly read from
// hosts which for which we control the uncertainty by checking that when a
// transaction restarts after an uncertain read, it will also take into account
// the target node's clock at the time of the failed read when forwarding the
// read timestamp.
// This is a prerequisite for being able to prevent further uncertainty
// restarts for that node and transaction without sacrificing correctness.
// See proto.Transaction.CertainNodes for details.
func TestUncertaintyMaxTimestampForwarding(t *testing.T) {
	db, eng, clock, mClock, _, transport, err := createTestDB()
	defer transport.Close()
	// Large offset so that any value in the future is an uncertain read.
	// Also makes sure that the values we write in the future below don't
	// actually wind up in the past.
	clock.SetMaxOffset(50000 * time.Millisecond)

	txnOpts := &client.TransactionOptions{
		Name: "uncertainty",
	}

	offsetNS := int64(100)
	keySlow := proto.Key("slow")
	keyFast := proto.Key("fast")
	valSlow := []byte("wols")
	valFast := []byte("tsaf")

	// Write keySlow at now+offset, keyFast at now+2*offset
	futureTS := clock.Now()
	futureTS.WallTime += offsetNS
	err = engine.MVCCPut(eng, nil, keySlow, futureTS,
		proto.Value{Bytes: valSlow}, nil)
	if err != nil {
		t.Fatal(err)
	}
	futureTS.WallTime += offsetNS
	err = engine.MVCCPut(eng, nil, keyFast, futureTS,
		proto.Value{Bytes: valFast}, nil)
	if err != nil {
		t.Fatal(err)
	}

	i := 0
	if tErr := db.RunTransaction(txnOpts, func(txn *client.KV) error {
		i++
		// The first command serves to start a Txn, fixing the timestamps.
		// There will be a restart, but this is idempotent.
		sr := &proto.ScanResponse{}
		if err = txn.Call(proto.Scan, proto.ScanArgs(proto.Key("t"), proto.Key("t"),
			0), sr); err != nil {
			t.Fatal(err)
		}

		// The server's clock suddenly jumps ahead of keyFast's timestamp.
		// There will be a restart, but this is idempotent.
		mClock.Set(2*offsetNS + 1)

		// Now read slowKey first. It should read at 0, catch an uncertainty error,
		// and get keySlow's timestamp in that error, but upgrade it to the larger
		// node clock (which is ahead of keyFast as well). If the last part does
		// not happen, the read of keyFast should fail (i.e. read nothing).
		// There will be exactly one restart here.
		gr := &proto.GetResponse{}
		if err = txn.Call(proto.Get, proto.GetArgs(keySlow), gr); err != nil {
			if i != 1 {
				t.Errorf("unexpected transaction error: %v", err)
			}
			return err
		}
		if gr.Value == nil || !bytes.Equal(gr.Value.Bytes, valSlow) {
			t.Errorf("read of %q returned %v, wanted value %q", keySlow, gr.Value,
				valSlow)
		}

		gr.Reset()
		// The node should already be certain, so we expect no restart here
		// and to read the correct key.
		if err = txn.Call(proto.Get, proto.GetArgs(keyFast), gr); err != nil {
			t.Errorf("second Get failed with %v", err)
		}
		if gr.Value == nil || !bytes.Equal(gr.Value.Bytes, valFast) {
			t.Errorf("read of %q returned %v, wanted value %q", keyFast, gr.Value,
				valFast)
		}
		return nil
	}); tErr != nil {
		t.Fatal(tErr)
	}
}
예제 #21
0
// setLastIndex persists a new last index.
func setLastIndex(eng engine.Engine, rangeID roachpb.RangeID, lastIndex uint64) error {
	return engine.MVCCPut(eng, nil, keys.RaftLastIndexKey(rangeID),
		roachpb.ZeroTimestamp,
		roachpb.MakeValueFromBytes(encoding.EncodeUint64(nil, lastIndex)), nil)
}
예제 #22
0
// Put sets the value for a specified key.
func (r *Range) Put(batch engine.Engine, ms *engine.MVCCStats, args *proto.PutRequest, reply *proto.PutResponse) {
	err := engine.MVCCPut(batch, ms, args.Key, args.Timestamp, args.Value, args.Txn)
	reply.SetGoError(err)
}