func setAppliedIndex( ctx context.Context, eng engine.ReadWriter, ms *enginepb.MVCCStats, rangeID roachpb.RangeID, appliedIndex, leaseAppliedIndex uint64, ) error { var value roachpb.Value value.SetInt(int64(appliedIndex)) if err := engine.MVCCPut(ctx, eng, ms, keys.RaftAppliedIndexKey(rangeID), hlc.ZeroTimestamp, value, nil /* txn */); err != nil { return err } value.SetInt(int64(leaseAppliedIndex)) return engine.MVCCPut(ctx, eng, ms, keys.LeaseAppliedIndexKey(rangeID), hlc.ZeroTimestamp, value, nil /* txn */) }
func setLastIndex( ctx context.Context, eng engine.ReadWriter, rangeID roachpb.RangeID, lastIndex uint64, ) error { var value roachpb.Value value.SetInt(int64(lastIndex)) return engine.MVCCPut(ctx, eng, nil, keys.RaftLastIndexKey(rangeID), hlc.ZeroTimestamp, value, nil /* txn */) }
func setFrozenStatus( ctx context.Context, eng engine.ReadWriter, ms *enginepb.MVCCStats, rangeID roachpb.RangeID, frozen storagebase.ReplicaState_FrozenEnum, ) error { if frozen == storagebase.ReplicaState_FROZEN_UNSPECIFIED { return errors.New("cannot persist unspecified FrozenStatus") } var val roachpb.Value val.SetBool(frozen == storagebase.ReplicaState_FROZEN) return engine.MVCCPut(ctx, eng, ms, keys.RangeFrozenStatusKey(rangeID), hlc.ZeroTimestamp, val, nil) }
// append the given entries to the raft log. Takes the previous values of // r.mu.lastIndex and r.mu.raftLogSize, and returns new values. We do this // rather than modifying them directly because these modifications need to be // atomic with the commit of the batch. func (r *Replica) append( ctx context.Context, batch engine.ReadWriter, prevLastIndex uint64, prevRaftLogSize int64, entries []raftpb.Entry, ) (uint64, int64, error) { if len(entries) == 0 { return prevLastIndex, prevRaftLogSize, nil } var diff enginepb.MVCCStats var value roachpb.Value for i := range entries { ent := &entries[i] key := keys.RaftLogKey(r.RangeID, ent.Index) if err := value.SetProto(ent); err != nil { return 0, 0, err } value.InitChecksum(key) var err error if ent.Index > prevLastIndex { err = engine.MVCCBlindPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */) } else { err = engine.MVCCPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */) } if err != nil { return 0, 0, err } } // Delete any previously appended log entries which never committed. lastIndex := entries[len(entries)-1].Index for i := lastIndex + 1; i <= prevLastIndex; i++ { err := engine.MVCCDelete(ctx, batch, &diff, keys.RaftLogKey(r.RangeID, i), hlc.ZeroTimestamp, nil /* txn */) if err != nil { return 0, 0, err } } if err := setLastIndex(ctx, batch, r.RangeID, lastIndex); err != nil { return 0, 0, err } raftLogSize := prevRaftLogSize + diff.SysBytes return lastIndex, raftLogSize, nil }
// createRangeData creates sample range data in all possible areas of // the key space. Returns a slice of the encoded keys of all created // data. func createRangeData(t *testing.T, r *Replica) []engine.MVCCKey { ts0 := hlc.ZeroTimestamp ts := hlc.Timestamp{WallTime: 1} desc := r.Desc() keyTSs := []struct { key roachpb.Key ts hlc.Timestamp }{ {keys.AbortCacheKey(r.RangeID, testTxnID), ts0}, {keys.AbortCacheKey(r.RangeID, testTxnID2), ts0}, {keys.RangeFrozenStatusKey(r.RangeID), ts0}, {keys.RangeLastGCKey(r.RangeID), ts0}, {keys.RaftAppliedIndexKey(r.RangeID), ts0}, {keys.RaftTruncatedStateKey(r.RangeID), ts0}, {keys.RangeLeaseKey(r.RangeID), ts0}, {keys.LeaseAppliedIndexKey(r.RangeID), ts0}, {keys.RangeStatsKey(r.RangeID), ts0}, {keys.RangeTxnSpanGCThresholdKey(r.RangeID), ts0}, {keys.RaftHardStateKey(r.RangeID), ts0}, {keys.RaftLastIndexKey(r.RangeID), ts0}, {keys.RaftLogKey(r.RangeID, 1), ts0}, {keys.RaftLogKey(r.RangeID, 2), ts0}, {keys.RangeLastReplicaGCTimestampKey(r.RangeID), ts0}, {keys.RangeLastVerificationTimestampKeyDeprecated(r.RangeID), ts0}, {keys.RangeDescriptorKey(desc.StartKey), ts}, {keys.TransactionKey(roachpb.Key(desc.StartKey), uuid.MakeV4()), ts0}, {keys.TransactionKey(roachpb.Key(desc.StartKey.Next()), uuid.MakeV4()), ts0}, {keys.TransactionKey(fakePrevKey(desc.EndKey), uuid.MakeV4()), ts0}, // TODO(bdarnell): KeyMin.Next() results in a key in the reserved system-local space. // Once we have resolved https://github.com/cockroachdb/cockroach/issues/437, // replace this with something that reliably generates the first valid key in the range. //{r.Desc().StartKey.Next(), ts}, // The following line is similar to StartKey.Next() but adds more to the key to // avoid falling into the system-local space. {append(append([]byte{}, desc.StartKey...), '\x02'), ts}, {fakePrevKey(r.Desc().EndKey), ts}, } keys := []engine.MVCCKey{} for _, keyTS := range keyTSs { if err := engine.MVCCPut(context.Background(), r.store.Engine(), nil, keyTS.key, keyTS.ts, roachpb.MakeValueFromString("value"), nil); err != nil { t.Fatal(err) } keys = append(keys, engine.MVCCKey{Key: keyTS.key, Timestamp: keyTS.ts}) } return keys }
// TestUncertaintyObservedTimestampForwarding checks that when receiving an // uncertainty restart on a node, the next attempt to read (at the increased // timestamp) is free from uncertainty. See roachpb.Transaction for details. func TestUncertaintyMaxTimestampForwarding(t *testing.T) { defer leaktest.AfterTest(t)() dbCtx := client.DefaultDBContext() s := &localtestcluster.LocalTestCluster{ // Large offset so that any value in the future is an uncertain read. Also // makes sure that the values we write in the future below don't actually // wind up in the past. Clock: hlc.NewClock(hlc.UnixNano, 50*time.Second), DBContext: &dbCtx, } s.Start(t, testutils.NewNodeTestBaseContext(), InitSenderForLocalTestCluster) defer s.Stop() disableOwnNodeCertain(t, s) offsetNS := int64(100) keySlow := roachpb.Key("slow") keyFast := roachpb.Key("fast") valSlow := []byte("wols") valFast := []byte("tsaf") // Write keySlow at now+offset, keyFast at now+2*offset futureTS := s.Clock.Now() futureTS.WallTime += offsetNS val := roachpb.MakeValueFromBytes(valSlow) if err := engine.MVCCPut(context.Background(), s.Eng, nil, keySlow, futureTS, val, nil); err != nil { t.Fatal(err) } futureTS.WallTime += offsetNS val.SetBytes(valFast) if err := engine.MVCCPut(context.Background(), s.Eng, nil, keyFast, futureTS, val, nil); err != nil { t.Fatal(err) } i := 0 if tErr := s.DB.Txn(context.TODO(), func(txn *client.Txn) error { i++ // The first command serves to start a Txn, fixing the timestamps. // There will be a restart, but this is idempotent. if _, err := txn.Scan("t", roachpb.Key("t").Next(), 0); err != nil { t.Fatal(err) } // This is a bit of a hack for the sake of this test: By visiting the // node above, we've made a note of its clock, which allows us to // prevent the restart. But we want to catch the restart, so reset the // observed timestamps. txn.Proto.ResetObservedTimestamps() // The server's clock suddenly jumps ahead of keyFast's timestamp. s.Manual.Increment(2*offsetNS + 1) // Now read slowKey first. It should read at 0, catch an uncertainty error, // and get keySlow's timestamp in that error, but upgrade it to the larger // node clock (which is ahead of keyFast as well). If the last part does // not happen, the read of keyFast should fail (i.e. read nothing). // There will be exactly one restart here. if gr, err := txn.Get(keySlow); err != nil { if i != 1 { t.Fatalf("unexpected transaction error: %s", err) } return err } else if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), valSlow) { t.Fatalf("read of %q returned %v, wanted value %q", keySlow, gr.Value, valSlow) } // The node should already be certain, so we expect no restart here // and to read the correct key. if gr, err := txn.Get(keyFast); err != nil { t.Fatalf("second Get failed with %s", err) } else if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), valFast) { t.Fatalf("read of %q returned %v, wanted value %q", keyFast, gr.Value, valFast) } return nil }); tErr != nil { t.Fatal(tErr) } }