// getInternal implements the actual logic of get function. // The values of multiple versions for the given key should // be organized as follows: // ... // keyA : MVCCMetatata of keyA // keyA_Timestamp_n : value of version_n // keyA_Timestamp_n-1 : value of version_n-1 // ... // keyA_Timestamp_0 : value of version_0 // keyB : MVCCMetadata of keyB // ... func (mvcc *MVCC) getInternal(key Key, timestamp proto.Timestamp, txnID []byte) ([]byte, proto.Timestamp, []byte, error) { meta := &proto.MVCCMetadata{} ok, err := GetProto(mvcc.engine, key, meta) if err != nil || !ok { return nil, proto.Timestamp{}, nil, err } // If the read timestamp is greater than the latest one, we can just // fetch the value without a scan. if !timestamp.Less(meta.Timestamp) { if len(meta.TxnID) > 0 && (len(txnID) == 0 || !bytes.Equal(meta.TxnID, txnID)) { return nil, proto.Timestamp{}, nil, &writeIntentError{TxnID: meta.TxnID} } latestKey := mvccEncodeKey(key, meta.Timestamp) val, err := mvcc.engine.Get(latestKey) return val, meta.Timestamp, meta.TxnID, err } nextKey := mvccEncodeKey(key, timestamp) // We use the PrefixEndKey(key) as the upper bound for scan. // If there is no other version after nextKey, it won't return // the value of the next key. kvs, err := mvcc.engine.Scan(nextKey, PrefixEndKey(key), 1) if len(kvs) > 0 { _, ts, _ := mvccDecodeKey(kvs[0].Key) return kvs[0].Value, ts, nil, err } return nil, proto.Timestamp{}, nil, err }
// TestCoordinatorHeartbeat verifies periodic heartbeat of the // transaction record. func TestCoordinatorHeartbeat(t *testing.T) { db, _, manual := createTestDB(t) defer db.Close() // Set heartbeat interval to 1ms for testing. db.coordinator.heartbeatInterval = 1 * time.Millisecond txnID := engine.Key("txn") <-db.Put(createPutRequest(engine.Key("a"), []byte("value"), txnID)) // Verify 3 heartbeats. var heartbeatTS proto.Timestamp for i := 0; i < 3; i++ { if err := util.IsTrueWithin(func() bool { ok, txn, err := getTxn(db, engine.MakeKey(engine.KeyLocalTransactionPrefix, txnID)) if !ok || err != nil { return false } // Advance clock by 1ns. // Locking the coordinator to prevent a data race. db.coordinator.Lock() *manual = hlc.ManualClock(*manual + 1) db.coordinator.Unlock() if heartbeatTS.Less(*txn.LastHeartbeat) { heartbeatTS = *txn.LastHeartbeat return true } return false }, 50*time.Millisecond); err != nil { t.Error("expected initial heartbeat within 50ms") } } }
// ExampleNewClock shows how to create a new // hybrid logical clock based on the local machine's // physical clock. The sanity checks in this example // will, of course, not fail and the output will be // the age of the Unix epoch in nanoseconds. func ExampleNewClock() { // Initialize a new clock, using the local // physical clock. c := NewClock(UnixNano) // Update the state of the hybrid clock. s := c.Now() time.Sleep(50 * time.Nanosecond) t := proto.Timestamp{WallTime: UnixNano()} // The sanity checks below will usually never be triggered. // Timestamp implements the util.Ordered interface. if s.Less(t) || !t.Less(s) { log.Fatalf("The later timestamp is smaller than the earlier one") } if t.WallTime-s.WallTime > 0 { log.Fatalf("HLC timestamp %d deviates from physical clock %d", s, t) } if s.Logical > 0 { log.Fatalf("Trivial timestamp has logical component") } fmt.Printf("The Unix Epoch is now approximately %dns old.\n", t.WallTime) }
// Add the specified timestamp to the cache as covering the range of // keys from start to end. If end is nil, the range covers the start // key only. txnID is nil for no transaction. readOnly specifies // whether the command adding this timestamp was read-only or not. func (tc *TimestampCache) Add(start, end proto.Key, timestamp proto.Timestamp, txnID []byte, readOnly bool) { // This gives us a memory-efficient end key if end is empty. if len(end) == 0 { end = start.Next() start = end[:len(start)] } if tc.latest.Less(timestamp) { tc.latest = timestamp } // Only add to the cache if the timestamp is more recent than the // low water mark. if tc.lowWater.Less(timestamp) { // Check existing, overlapping entries. Remove superseded // entries or return without adding this entry if necessary. key := tc.cache.NewKey(start, end) for _, o := range tc.cache.GetOverlaps(start, end) { ce := o.Value.(cacheEntry) if ce.readOnly != readOnly { continue } if o.Key.Contains(key) && !ce.timestamp.Less(timestamp) { return // don't add this key; there's already a cache entry with >= timestamp. } else if key.Contains(o.Key) && !timestamp.Less(ce.timestamp) { tc.cache.Del(o.Key) // delete existing key; this cache entry supersedes. } } ce := cacheEntry{timestamp: timestamp, txnID: txnID, readOnly: readOnly} tc.cache.Add(key, ce) } }
// Get returns the value for the key specified in the request, while // satisfying the given timestamp condition. The key may be // arbitrarily encoded; it will be binary-encoded to remove any // internal null characters. If no value for the key exists, or has // been deleted, returns nil for value. // // The values of multiple versions for the given key should // be organized as follows: // ... // keyA : MVCCMetatata of keyA // keyA_Timestamp_n : value of version_n // keyA_Timestamp_n-1 : value of version_n-1 // ... // keyA_Timestamp_0 : value of version_0 // keyB : MVCCMetadata of keyB // ... func (mvcc *MVCC) Get(key Key, timestamp proto.Timestamp, txn *proto.Transaction) (*proto.Value, error) { binKey := encoding.EncodeBinary(nil, key) meta := &proto.MVCCMetadata{} ok, err := GetProto(mvcc.engine, binKey, meta) if err != nil || !ok { return nil, err } // If the read timestamp is greater than the latest one, we can just // fetch the value without a scan. ts := proto.Timestamp{} var valBytes []byte if !timestamp.Less(meta.Timestamp) { if meta.Txn != nil && (txn == nil || !bytes.Equal(meta.Txn.ID, txn.ID)) { return nil, &writeIntentError{Txn: meta.Txn} } latestKey := mvccEncodeKey(binKey, meta.Timestamp) valBytes, err = mvcc.engine.Get(latestKey) ts = meta.Timestamp } else { nextKey := mvccEncodeKey(binKey, timestamp) // We use the PrefixEndKey(key) as the upper bound for scan. // If there is no other version after nextKey, it won't return // the value of the next key. kvs, err := mvcc.engine.Scan(nextKey, PrefixEndKey(binKey), 1) if len(kvs) == 0 { return nil, err } _, ts, _ = mvccDecodeKey(kvs[0].Key) valBytes = kvs[0].Value } if valBytes == nil { return nil, nil } // Unmarshal the mvcc value. value := &proto.MVCCValue{} if err := gogoproto.Unmarshal(valBytes, value); err != nil { return nil, err } // Set the timestamp if the value is not nil (i.e. not a deletion tombstone). if value.Value != nil { value.Value.Timestamp = &ts } else if !value.Deleted { log.Warningf("encountered MVCC value at key %q with a nil proto.Value but with !Deleted: %+v", key, value) } return value.Value, nil }
// TestTxnCoordSenderHeartbeat verifies periodic heartbeat of the // transaction record. func TestTxnCoordSenderHeartbeat(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() defer teardownHeartbeats(s.Sender) // Set heartbeat interval to 1ms for testing. s.Sender.heartbeatInterval = 1 * time.Millisecond initialTxn := newTxn(s.Clock, proto.Key("a")) call := proto.Call{ Args: createPutRequest(proto.Key("a"), []byte("value"), initialTxn), Reply: &proto.PutResponse{}} if err := sendCall(s.Sender, call); err != nil { t.Fatal(err) } *initialTxn = *call.Reply.Header().Txn // Verify 3 heartbeats. var heartbeatTS proto.Timestamp for i := 0; i < 3; i++ { if err := util.IsTrueWithin(func() bool { ok, txn, err := getTxn(s.Sender, initialTxn) if !ok || err != nil { return false } // Advance clock by 1ns. // Locking the TxnCoordSender to prevent a data race. s.Sender.Lock() s.Manual.Increment(1) s.Sender.Unlock() if heartbeatTS.Less(*txn.LastHeartbeat) { heartbeatTS = *txn.LastHeartbeat return true } return false }, 50*time.Millisecond); err != nil { t.Error("expected initial heartbeat within 50ms") } } }
// putInternal adds a new timestamped value to the specified key. // If value is nil, creates a deletion tombstone value. func (mvcc *MVCC) putInternal(key Key, timestamp proto.Timestamp, value proto.MVCCValue, txn *proto.Transaction) error { if value.Value != nil && value.Value.Bytes != nil && value.Value.Integer != nil { return util.Errorf("key %q value contains both a byte slice and an integer value: %+v", key, value) } meta := &proto.MVCCMetadata{} ok, err := GetProto(mvcc.engine, key, meta) if err != nil { return err } // Use a batch because a put involves multiple writes. var batch []interface{} // In case the key metadata exists. if ok { // There is an uncommitted write intent and the current Put // operation does not come from the same transaction. // This should not happen since range should check the existing // write intent before executing any Put action at MVCC level. if meta.Txn != nil && (txn == nil || !bytes.Equal(meta.Txn.ID, txn.ID)) { return &writeIntentError{Txn: meta.Txn} } // We can update the current metadata only if both the timestamp // and epoch of the new intent are greater than or equal to // existing. If either of these conditions doesn't hold, it's // likely the case that an older RPC is arriving out of order. if !timestamp.Less(meta.Timestamp) && (meta.Txn == nil || txn.Epoch >= meta.Txn.Epoch) { // If this is an intent and timestamps have changed, need to remove old version. if meta.Txn != nil && !timestamp.Equal(meta.Timestamp) { batch = append(batch, BatchDelete(mvccEncodeKey(key, meta.Timestamp))) } meta = &proto.MVCCMetadata{Txn: txn, Timestamp: timestamp} batchPut, err := MakeBatchPutProto(key, meta) if err != nil { return err } batch = append(batch, batchPut) } else { // In case we receive a Put request to update an old version, // it must be an error since raft should handle any client // retry from timeout. return &writeTooOldError{Timestamp: meta.Timestamp, Txn: meta.Txn} } } else { // In case the key metadata does not exist yet. // Create key metadata. meta = &proto.MVCCMetadata{Txn: txn, Timestamp: timestamp} batchPut, err := MakeBatchPutProto(key, meta) if err != nil { return err } batch = append(batch, batchPut) } // Make sure to zero the redundant timestamp (timestamp is encoded // into the key, so don't need it in both places). if value.Value != nil { value.Value.Timestamp = nil } batchPut, err := MakeBatchPutProto(mvccEncodeKey(key, timestamp), &value) if err != nil { return err } batch = append(batch, batchPut) return mvcc.engine.WriteBatch(batch) }