Exemplo n.º 1
0
// process iterates through all keys in a replica's range, calling the garbage
// collector for each key and associated set of values. GC'd keys are batched
// into GC calls. Extant intents are resolved if intents are older than
// intentAgeThreshold.
func (gcq *gcQueue) process(now proto.Timestamp, repl *Replica) error {
	snap := repl.rm.Engine().NewSnapshot()
	iter := newRangeDataIterator(repl.Desc(), snap)
	defer iter.Close()
	defer snap.Close()

	// Lookup the GC policy for the zone containing this key range.
	policy, err := gcq.lookupGCPolicy(repl)
	if err != nil {
		return err
	}

	gcMeta := proto.NewGCMetadata(now.WallTime)
	gc := engine.NewGarbageCollector(now, policy)

	// Compute intent expiration (intent age at which we attempt to resolve).
	intentExp := now
	intentExp.WallTime -= intentAgeThreshold.Nanoseconds()

	gcArgs := &proto.GCRequest{
		RequestHeader: proto.RequestHeader{
			Timestamp: now,
			RangeID:   repl.Desc().RangeID,
		},
	}
	var mu sync.Mutex
	var oldestIntentNanos int64 = math.MaxInt64
	var expBaseKey proto.Key
	var keys []proto.EncodedKey
	var vals [][]byte

	// Maps from txn ID to txn and intent key slice.
	txnMap := map[string]*proto.Transaction{}
	intentMap := map[string][]proto.Key{}

	// updateOldestIntent atomically updates the oldest intent.
	updateOldestIntent := func(intentNanos int64) {
		mu.Lock()
		defer mu.Unlock()
		if intentNanos < oldestIntentNanos {
			oldestIntentNanos = intentNanos
		}
	}

	// processKeysAndValues is invoked with each key and its set of
	// values. Intents older than the intent age threshold are sent for
	// resolution and values after the MVCC metadata, and possible
	// intent, are sent for garbage collection.
	processKeysAndValues := func() {
		// If there's more than a single value for the key, possibly send for GC.
		if len(keys) > 1 {
			meta := &engine.MVCCMetadata{}
			if err := gogoproto.Unmarshal(vals[0], meta); err != nil {
				log.Errorf("unable to unmarshal MVCC metadata for key %q: %s", keys[0], err)
			} else {
				// In the event that there's an active intent, send for
				// intent resolution if older than the threshold.
				startIdx := 1
				if meta.Txn != nil {
					// Keep track of intent to resolve if older than the intent
					// expiration threshold.
					if meta.Timestamp.Less(intentExp) {
						id := string(meta.Txn.ID)
						txnMap[id] = meta.Txn
						intentMap[id] = append(intentMap[id], expBaseKey)
					} else {
						updateOldestIntent(meta.Txn.OrigTimestamp.WallTime)
					}
					// With an active intent, GC ignores MVCC metadata & intent value.
					startIdx = 2
				}
				// See if any values may be GC'd.
				if gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); !gcTS.Equal(proto.ZeroTimestamp) {
					// TODO(spencer): need to split the requests up into
					// multiple requests in the event that more than X keys
					// are added to the request.
					gcArgs.Keys = append(gcArgs.Keys, proto.GCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS})
				}
			}
		}
	}

	// Iterate through the keys and values of this replica's range.
	for ; iter.Valid(); iter.Next() {
		baseKey, ts, isValue := engine.MVCCDecodeKey(iter.Key())
		if !isValue {
			// Moving to the next key (& values).
			processKeysAndValues()
			expBaseKey = baseKey
			keys = []proto.EncodedKey{iter.Key()}
			vals = [][]byte{iter.Value()}
		} else {
			if !baseKey.Equal(expBaseKey) {
				log.Errorf("unexpectedly found a value for %q with ts=%s; expected key %q", baseKey, ts, expBaseKey)
				continue
			}
			keys = append(keys, iter.Key())
			vals = append(vals, iter.Value())
		}
	}
	if iter.Error() != nil {
		return iter.Error()
	}
	// Handle last collected set of keys/vals.
	processKeysAndValues()

	// Set start and end keys.
	switch len(gcArgs.Keys) {
	case 0:
		return nil
	case 1:
		gcArgs.Key = gcArgs.Keys[0].Key
		gcArgs.EndKey = gcArgs.Key.Next()
	default:
		gcArgs.Key = gcArgs.Keys[0].Key
		gcArgs.EndKey = gcArgs.Keys[len(gcArgs.Keys)-1].Key
	}

	// Process push transactions in parallel.
	var wg sync.WaitGroup
	for _, txn := range txnMap {
		wg.Add(1)
		go gcq.pushTxn(repl, now, txn, updateOldestIntent, &wg)
	}
	wg.Wait()

	// Resolve all intents.
	// TODO(spencer): use a batch here when available.
	for id, txn := range txnMap {
		if txn.Status != proto.PENDING {
			// The transaction was successfully pushed, so resolve the intents.
			for _, key := range intentMap[id] {
				resolveArgs := &proto.ResolveIntentRequest{
					RequestHeader: proto.RequestHeader{
						Timestamp: now,
						Key:       key,
						User:      security.RootUser,
						Txn:       txn,
					},
				}
				if _, err := repl.AddCmd(repl.context(), resolveArgs); err != nil {
					log.Warningf("resolve of key %q failed: %s", key, err)
					updateOldestIntent(txn.OrigTimestamp.WallTime)
				}
			}
		}
	}

	// Send GC request through range.
	gcMeta.OldestIntentNanos = gogoproto.Int64(oldestIntentNanos)
	gcArgs.GCMeta = *gcMeta
	if _, err := repl.AddCmd(repl.context(), gcArgs); err != nil {
		return err
	}

	// Store current timestamp as last verification for this replica, as
	// we've just successfully scanned.
	if err := repl.SetLastVerificationTimestamp(now); err != nil {
		log.Errorf("failed to set last verification timestamp for replica %s: %s", repl, err)
	}

	return nil
}
Exemplo n.º 2
0
// process iterates through all keys in a range, calling the garbage
// collector for each key and associated set of values. GC'd keys are
// batched into InternalGC calls. Extant intents are resolved if
// intents are older than intentAgeThreshold.
func (gcq *gcQueue) process(now proto.Timestamp, rng *Range) error {
	snap := rng.rm.Engine().NewSnapshot()
	iter := newRangeDataIterator(rng.Desc(), snap)
	defer iter.Close()
	defer snap.Close()

	// Lookup the GC policy for the zone containing this key range.
	policy, err := gcq.lookupGCPolicy(rng)
	if err != nil {
		return err
	}

	gcMeta := proto.NewGCMetadata(now.WallTime)
	gc := engine.NewGarbageCollector(now, policy)

	// Compute intent expiration (intent age at which we attempt to resolve).
	intentExp := now
	intentExp.WallTime -= intentAgeThreshold.Nanoseconds()

	gcArgs := &proto.InternalGCRequest{
		RequestHeader: proto.RequestHeader{
			Timestamp: now,
			RaftID:    rng.Desc().RaftID,
		},
	}
	var mu sync.Mutex
	var oldestIntentNanos int64 = math.MaxInt64
	var wg sync.WaitGroup
	var expBaseKey proto.Key
	var keys []proto.EncodedKey
	var vals [][]byte

	// updateOldestIntent atomically updates the oldest intent.
	updateOldestIntent := func(intentNanos int64) {
		mu.Lock()
		defer mu.Unlock()
		if intentNanos < oldestIntentNanos {
			oldestIntentNanos = intentNanos
		}
	}

	// processKeysAndValues is invoked with each key and its set of
	// values. Intents older than the intent age threshold are sent for
	// resolution and values after the MVCC metadata, and possible
	// intent, are sent for garbage collection.
	processKeysAndValues := func() {
		// If there's more than a single value for the key, possibly send for GC.
		if len(keys) > 1 {
			meta := &engine.MVCCMetadata{}
			if err := gogoproto.Unmarshal(vals[0], meta); err != nil {
				log.Errorf("unable to unmarshal MVCC metadata for key %q: %s", keys[0], err)
			} else {
				// In the event that there's an active intent, send for
				// intent resolution if older than the threshold.
				startIdx := 1
				if meta.Txn != nil {
					// Resolve intent asynchronously in a goroutine if the intent
					// is older than the intent expiration threshold.
					if meta.Timestamp.Less(intentExp) {
						wg.Add(1)
						go gcq.resolveIntent(rng, expBaseKey, meta, updateOldestIntent, &wg)
					} else {
						updateOldestIntent(meta.Timestamp.WallTime)
					}
					// With an active intent, GC ignores MVCC metadata & intent value.
					startIdx = 2
				}
				// See if any values may be GC'd.
				if gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); !gcTS.Equal(proto.ZeroTimestamp) {
					// TODO(spencer): need to split the requests up into
					// multiple requests in the event that more than X keys
					// are added to the request.
					gcArgs.Keys = append(gcArgs.Keys, proto.InternalGCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS})
				}
			}
		}
	}

	// Iterate through this range's keys and values.
	for ; iter.Valid(); iter.Next() {
		baseKey, ts, isValue := engine.MVCCDecodeKey(iter.Key())
		if !isValue {
			// Moving to the next key (& values).
			processKeysAndValues()
			expBaseKey = baseKey
			keys = []proto.EncodedKey{iter.Key()}
			vals = [][]byte{iter.Value()}
		} else {
			if !baseKey.Equal(expBaseKey) {
				log.Errorf("unexpectedly found a value for %q with ts=%s; expected key %q", baseKey, ts, expBaseKey)
				continue
			}
			keys = append(keys, iter.Key())
			vals = append(vals, iter.Value())
		}
	}
	if iter.Error() != nil {
		return iter.Error()
	}
	// Handle last collected set of keys/vals.
	processKeysAndValues()

	// Set start and end keys.
	switch len(gcArgs.Keys) {
	case 0:
		return nil
	case 1:
		gcArgs.Key = gcArgs.Keys[0].Key
		gcArgs.EndKey = gcArgs.Key.Next()
	default:
		gcArgs.Key = gcArgs.Keys[0].Key
		gcArgs.EndKey = gcArgs.Keys[len(gcArgs.Keys)-1].Key
	}

	// Wait for any outstanding intent resolves and set oldest extant intent.
	wg.Wait()
	gcMeta.OldestIntentNanos = gogoproto.Int64(oldestIntentNanos)

	// Send GC request through range.
	gcArgs.GCMeta = *gcMeta
	if err := rng.AddCmd(rng.context(), proto.Call{Args: gcArgs, Reply: &proto.InternalGCResponse{}}); err != nil {
		return err
	}

	// Store current timestamp as last verification for this range, as
	// we've just successfully scanned.
	if err := rng.SetLastVerificationTimestamp(now); err != nil {
		log.Errorf("failed to set last verification timestamp for range %s: %s", rng, err)
	}

	return nil
}