Esempio n. 1
0
// getInternal implements the actual logic of get function.
// The values of multiple versions for the given key should
// be organized as follows:
// ...
// keyA : MVCCMetatata of keyA
// keyA_Timestamp_n : value of version_n
// keyA_Timestamp_n-1 : value of version_n-1
// ...
// keyA_Timestamp_0 : value of version_0
// keyB : MVCCMetadata of keyB
// ...
func (mvcc *MVCC) getInternal(key Key, timestamp proto.Timestamp, txnID []byte) ([]byte, proto.Timestamp, []byte, error) {
	meta := &proto.MVCCMetadata{}
	ok, err := GetProto(mvcc.engine, key, meta)
	if err != nil || !ok {
		return nil, proto.Timestamp{}, nil, err
	}
	// If the read timestamp is greater than the latest one, we can just
	// fetch the value without a scan.
	if !timestamp.Less(meta.Timestamp) {
		if len(meta.TxnID) > 0 && (len(txnID) == 0 || !bytes.Equal(meta.TxnID, txnID)) {
			return nil, proto.Timestamp{}, nil, &writeIntentError{TxnID: meta.TxnID}
		}

		latestKey := mvccEncodeKey(key, meta.Timestamp)
		val, err := mvcc.engine.Get(latestKey)
		return val, meta.Timestamp, meta.TxnID, err
	}

	nextKey := mvccEncodeKey(key, timestamp)
	// We use the PrefixEndKey(key) as the upper bound for scan.
	// If there is no other version after nextKey, it won't return
	// the value of the next key.
	kvs, err := mvcc.engine.Scan(nextKey, PrefixEndKey(key), 1)
	if len(kvs) > 0 {
		_, ts, _ := mvccDecodeKey(kvs[0].Key)
		return kvs[0].Value, ts, nil, err
	}
	return nil, proto.Timestamp{}, nil, err
}
Esempio n. 2
0
// TestCoordinatorHeartbeat verifies periodic heartbeat of the
// transaction record.
func TestCoordinatorHeartbeat(t *testing.T) {
	db, _, manual := createTestDB(t)
	defer db.Close()

	// Set heartbeat interval to 1ms for testing.
	db.coordinator.heartbeatInterval = 1 * time.Millisecond

	txnID := engine.Key("txn")
	<-db.Put(createPutRequest(engine.Key("a"), []byte("value"), txnID))

	// Verify 3 heartbeats.
	var heartbeatTS proto.Timestamp
	for i := 0; i < 3; i++ {
		if err := util.IsTrueWithin(func() bool {
			ok, txn, err := getTxn(db, engine.MakeKey(engine.KeyLocalTransactionPrefix, txnID))
			if !ok || err != nil {
				return false
			}
			// Advance clock by 1ns.
			// Locking the coordinator to prevent a data race.
			db.coordinator.Lock()
			*manual = hlc.ManualClock(*manual + 1)
			db.coordinator.Unlock()
			if heartbeatTS.Less(*txn.LastHeartbeat) {
				heartbeatTS = *txn.LastHeartbeat
				return true
			}
			return false
		}, 50*time.Millisecond); err != nil {
			t.Error("expected initial heartbeat within 50ms")
		}
	}
}
Esempio n. 3
0
// ExampleNewClock shows how to create a new
// hybrid logical clock based on the local machine's
// physical clock. The sanity checks in this example
// will, of course, not fail and the output will be
// the age of the Unix epoch in nanoseconds.
func ExampleNewClock() {
	// Initialize a new clock, using the local
	// physical clock.
	c := NewClock(UnixNano)
	// Update the state of the hybrid clock.
	s := c.Now()
	time.Sleep(50 * time.Nanosecond)
	t := proto.Timestamp{WallTime: UnixNano()}
	// The sanity checks below will usually never be triggered.

	// Timestamp implements the util.Ordered interface.
	if s.Less(t) || !t.Less(s) {
		log.Fatalf("The later timestamp is smaller than the earlier one")
	}

	if t.WallTime-s.WallTime > 0 {
		log.Fatalf("HLC timestamp %d deviates from physical clock %d", s, t)
	}

	if s.Logical > 0 {
		log.Fatalf("Trivial timestamp has logical component")
	}

	fmt.Printf("The Unix Epoch is now approximately %dns old.\n", t.WallTime)
}
Esempio n. 4
0
// Add the specified timestamp to the cache as covering the range of
// keys from start to end. If end is nil, the range covers the start
// key only. txnID is nil for no transaction. readOnly specifies
// whether the command adding this timestamp was read-only or not.
func (tc *TimestampCache) Add(start, end proto.Key, timestamp proto.Timestamp, txnID []byte, readOnly bool) {
	// This gives us a memory-efficient end key if end is empty.
	if len(end) == 0 {
		end = start.Next()
		start = end[:len(start)]
	}
	if tc.latest.Less(timestamp) {
		tc.latest = timestamp
	}
	// Only add to the cache if the timestamp is more recent than the
	// low water mark.
	if tc.lowWater.Less(timestamp) {
		// Check existing, overlapping entries. Remove superseded
		// entries or return without adding this entry if necessary.
		key := tc.cache.NewKey(start, end)
		for _, o := range tc.cache.GetOverlaps(start, end) {
			ce := o.Value.(cacheEntry)
			if ce.readOnly != readOnly {
				continue
			}
			if o.Key.Contains(key) && !ce.timestamp.Less(timestamp) {
				return // don't add this key; there's already a cache entry with >= timestamp.
			} else if key.Contains(o.Key) && !timestamp.Less(ce.timestamp) {
				tc.cache.Del(o.Key) // delete existing key; this cache entry supersedes.
			}
		}
		ce := cacheEntry{timestamp: timestamp, txnID: txnID, readOnly: readOnly}
		tc.cache.Add(key, ce)
	}
}
Esempio n. 5
0
// Get returns the value for the key specified in the request, while
// satisfying the given timestamp condition. The key may be
// arbitrarily encoded; it will be binary-encoded to remove any
// internal null characters. If no value for the key exists, or has
// been deleted, returns nil for value.
//
// The values of multiple versions for the given key should
// be organized as follows:
// ...
// keyA : MVCCMetatata of keyA
// keyA_Timestamp_n : value of version_n
// keyA_Timestamp_n-1 : value of version_n-1
// ...
// keyA_Timestamp_0 : value of version_0
// keyB : MVCCMetadata of keyB
// ...
func (mvcc *MVCC) Get(key Key, timestamp proto.Timestamp, txn *proto.Transaction) (*proto.Value, error) {
	binKey := encoding.EncodeBinary(nil, key)
	meta := &proto.MVCCMetadata{}
	ok, err := GetProto(mvcc.engine, binKey, meta)
	if err != nil || !ok {
		return nil, err
	}
	// If the read timestamp is greater than the latest one, we can just
	// fetch the value without a scan.
	ts := proto.Timestamp{}
	var valBytes []byte
	if !timestamp.Less(meta.Timestamp) {
		if meta.Txn != nil && (txn == nil || !bytes.Equal(meta.Txn.ID, txn.ID)) {
			return nil, &writeIntentError{Txn: meta.Txn}
		}

		latestKey := mvccEncodeKey(binKey, meta.Timestamp)
		valBytes, err = mvcc.engine.Get(latestKey)
		ts = meta.Timestamp
	} else {
		nextKey := mvccEncodeKey(binKey, timestamp)
		// We use the PrefixEndKey(key) as the upper bound for scan.
		// If there is no other version after nextKey, it won't return
		// the value of the next key.
		kvs, err := mvcc.engine.Scan(nextKey, PrefixEndKey(binKey), 1)
		if len(kvs) == 0 {
			return nil, err
		}
		_, ts, _ = mvccDecodeKey(kvs[0].Key)
		valBytes = kvs[0].Value
	}
	if valBytes == nil {
		return nil, nil
	}
	// Unmarshal the mvcc value.
	value := &proto.MVCCValue{}
	if err := gogoproto.Unmarshal(valBytes, value); err != nil {
		return nil, err
	}
	// Set the timestamp if the value is not nil (i.e. not a deletion tombstone).
	if value.Value != nil {
		value.Value.Timestamp = &ts
	} else if !value.Deleted {
		log.Warningf("encountered MVCC value at key %q with a nil proto.Value but with !Deleted: %+v", key, value)
	}
	return value.Value, nil
}
// TestTxnCoordSenderHeartbeat verifies periodic heartbeat of the
// transaction record.
func TestTxnCoordSenderHeartbeat(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := createTestDB(t)
	defer s.Stop()
	defer teardownHeartbeats(s.Sender)

	// Set heartbeat interval to 1ms for testing.
	s.Sender.heartbeatInterval = 1 * time.Millisecond

	initialTxn := newTxn(s.Clock, proto.Key("a"))
	call := proto.Call{
		Args:  createPutRequest(proto.Key("a"), []byte("value"), initialTxn),
		Reply: &proto.PutResponse{}}
	if err := sendCall(s.Sender, call); err != nil {
		t.Fatal(err)
	}
	*initialTxn = *call.Reply.Header().Txn

	// Verify 3 heartbeats.
	var heartbeatTS proto.Timestamp
	for i := 0; i < 3; i++ {
		if err := util.IsTrueWithin(func() bool {
			ok, txn, err := getTxn(s.Sender, initialTxn)
			if !ok || err != nil {
				return false
			}
			// Advance clock by 1ns.
			// Locking the TxnCoordSender to prevent a data race.
			s.Sender.Lock()
			s.Manual.Increment(1)
			s.Sender.Unlock()
			if heartbeatTS.Less(*txn.LastHeartbeat) {
				heartbeatTS = *txn.LastHeartbeat
				return true
			}
			return false
		}, 50*time.Millisecond); err != nil {
			t.Error("expected initial heartbeat within 50ms")
		}
	}
}
Esempio n. 7
0
// putInternal adds a new timestamped value to the specified key.
// If value is nil, creates a deletion tombstone value.
func (mvcc *MVCC) putInternal(key Key, timestamp proto.Timestamp, value proto.MVCCValue, txn *proto.Transaction) error {
	if value.Value != nil && value.Value.Bytes != nil && value.Value.Integer != nil {
		return util.Errorf("key %q value contains both a byte slice and an integer value: %+v", key, value)
	}

	meta := &proto.MVCCMetadata{}
	ok, err := GetProto(mvcc.engine, key, meta)
	if err != nil {
		return err
	}

	// Use a batch because a put involves multiple writes.
	var batch []interface{}

	// In case the key metadata exists.
	if ok {
		// There is an uncommitted write intent and the current Put
		// operation does not come from the same transaction.
		// This should not happen since range should check the existing
		// write intent before executing any Put action at MVCC level.
		if meta.Txn != nil && (txn == nil || !bytes.Equal(meta.Txn.ID, txn.ID)) {
			return &writeIntentError{Txn: meta.Txn}
		}

		// We can update the current metadata only if both the timestamp
		// and epoch of the new intent are greater than or equal to
		// existing. If either of these conditions doesn't hold, it's
		// likely the case that an older RPC is arriving out of order.
		if !timestamp.Less(meta.Timestamp) && (meta.Txn == nil || txn.Epoch >= meta.Txn.Epoch) {
			// If this is an intent and timestamps have changed, need to remove old version.
			if meta.Txn != nil && !timestamp.Equal(meta.Timestamp) {
				batch = append(batch, BatchDelete(mvccEncodeKey(key, meta.Timestamp)))
			}
			meta = &proto.MVCCMetadata{Txn: txn, Timestamp: timestamp}
			batchPut, err := MakeBatchPutProto(key, meta)
			if err != nil {
				return err
			}
			batch = append(batch, batchPut)
		} else {
			// In case we receive a Put request to update an old version,
			// it must be an error since raft should handle any client
			// retry from timeout.
			return &writeTooOldError{Timestamp: meta.Timestamp, Txn: meta.Txn}
		}
	} else { // In case the key metadata does not exist yet.
		// Create key metadata.
		meta = &proto.MVCCMetadata{Txn: txn, Timestamp: timestamp}
		batchPut, err := MakeBatchPutProto(key, meta)
		if err != nil {
			return err
		}
		batch = append(batch, batchPut)
	}

	// Make sure to zero the redundant timestamp (timestamp is encoded
	// into the key, so don't need it in both places).
	if value.Value != nil {
		value.Value.Timestamp = nil
	}
	batchPut, err := MakeBatchPutProto(mvccEncodeKey(key, timestamp), &value)
	if err != nil {
		return err
	}
	batch = append(batch, batchPut)
	return mvcc.engine.WriteBatch(batch)
}