// process iterates through all keys in a replica's range, calling the garbage // collector for each key and associated set of values. GC'd keys are batched // into GC calls. Extant intents are resolved if intents are older than // intentAgeThreshold. func (gcq *gcQueue) process(now proto.Timestamp, repl *Replica) error { snap := repl.rm.Engine().NewSnapshot() iter := newRangeDataIterator(repl.Desc(), snap) defer iter.Close() defer snap.Close() // Lookup the GC policy for the zone containing this key range. policy, err := gcq.lookupGCPolicy(repl) if err != nil { return err } gcMeta := proto.NewGCMetadata(now.WallTime) gc := engine.NewGarbageCollector(now, policy) // Compute intent expiration (intent age at which we attempt to resolve). intentExp := now intentExp.WallTime -= intentAgeThreshold.Nanoseconds() gcArgs := &proto.GCRequest{ RequestHeader: proto.RequestHeader{ Timestamp: now, RangeID: repl.Desc().RangeID, }, } var mu sync.Mutex var oldestIntentNanos int64 = math.MaxInt64 var expBaseKey proto.Key var keys []proto.EncodedKey var vals [][]byte // Maps from txn ID to txn and intent key slice. txnMap := map[string]*proto.Transaction{} intentMap := map[string][]proto.Key{} // updateOldestIntent atomically updates the oldest intent. updateOldestIntent := func(intentNanos int64) { mu.Lock() defer mu.Unlock() if intentNanos < oldestIntentNanos { oldestIntentNanos = intentNanos } } // processKeysAndValues is invoked with each key and its set of // values. Intents older than the intent age threshold are sent for // resolution and values after the MVCC metadata, and possible // intent, are sent for garbage collection. processKeysAndValues := func() { // If there's more than a single value for the key, possibly send for GC. if len(keys) > 1 { meta := &engine.MVCCMetadata{} if err := gogoproto.Unmarshal(vals[0], meta); err != nil { log.Errorf("unable to unmarshal MVCC metadata for key %q: %s", keys[0], err) } else { // In the event that there's an active intent, send for // intent resolution if older than the threshold. startIdx := 1 if meta.Txn != nil { // Keep track of intent to resolve if older than the intent // expiration threshold. if meta.Timestamp.Less(intentExp) { id := string(meta.Txn.ID) txnMap[id] = meta.Txn intentMap[id] = append(intentMap[id], expBaseKey) } else { updateOldestIntent(meta.Txn.OrigTimestamp.WallTime) } // With an active intent, GC ignores MVCC metadata & intent value. startIdx = 2 } // See if any values may be GC'd. if gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); !gcTS.Equal(proto.ZeroTimestamp) { // TODO(spencer): need to split the requests up into // multiple requests in the event that more than X keys // are added to the request. gcArgs.Keys = append(gcArgs.Keys, proto.GCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS}) } } } } // Iterate through the keys and values of this replica's range. for ; iter.Valid(); iter.Next() { baseKey, ts, isValue := engine.MVCCDecodeKey(iter.Key()) if !isValue { // Moving to the next key (& values). processKeysAndValues() expBaseKey = baseKey keys = []proto.EncodedKey{iter.Key()} vals = [][]byte{iter.Value()} } else { if !baseKey.Equal(expBaseKey) { log.Errorf("unexpectedly found a value for %q with ts=%s; expected key %q", baseKey, ts, expBaseKey) continue } keys = append(keys, iter.Key()) vals = append(vals, iter.Value()) } } if iter.Error() != nil { return iter.Error() } // Handle last collected set of keys/vals. processKeysAndValues() // Set start and end keys. switch len(gcArgs.Keys) { case 0: return nil case 1: gcArgs.Key = gcArgs.Keys[0].Key gcArgs.EndKey = gcArgs.Key.Next() default: gcArgs.Key = gcArgs.Keys[0].Key gcArgs.EndKey = gcArgs.Keys[len(gcArgs.Keys)-1].Key } // Process push transactions in parallel. var wg sync.WaitGroup for _, txn := range txnMap { wg.Add(1) go gcq.pushTxn(repl, now, txn, updateOldestIntent, &wg) } wg.Wait() // Resolve all intents. // TODO(spencer): use a batch here when available. for id, txn := range txnMap { if txn.Status != proto.PENDING { // The transaction was successfully pushed, so resolve the intents. for _, key := range intentMap[id] { resolveArgs := &proto.ResolveIntentRequest{ RequestHeader: proto.RequestHeader{ Timestamp: now, Key: key, User: security.RootUser, Txn: txn, }, } if _, err := repl.AddCmd(repl.context(), resolveArgs); err != nil { log.Warningf("resolve of key %q failed: %s", key, err) updateOldestIntent(txn.OrigTimestamp.WallTime) } } } } // Send GC request through range. gcMeta.OldestIntentNanos = gogoproto.Int64(oldestIntentNanos) gcArgs.GCMeta = *gcMeta if _, err := repl.AddCmd(repl.context(), gcArgs); err != nil { return err } // Store current timestamp as last verification for this replica, as // we've just successfully scanned. if err := repl.SetLastVerificationTimestamp(now); err != nil { log.Errorf("failed to set last verification timestamp for replica %s: %s", repl, err) } return nil }
// process iterates through all keys in a range, calling the garbage // collector for each key and associated set of values. GC'd keys are // batched into InternalGC calls. Extant intents are resolved if // intents are older than intentAgeThreshold. func (gcq *gcQueue) process(now proto.Timestamp, rng *Range) error { snap := rng.rm.Engine().NewSnapshot() iter := newRangeDataIterator(rng.Desc(), snap) defer iter.Close() defer snap.Close() // Lookup the GC policy for the zone containing this key range. policy, err := gcq.lookupGCPolicy(rng) if err != nil { return err } gcMeta := proto.NewGCMetadata(now.WallTime) gc := engine.NewGarbageCollector(now, policy) // Compute intent expiration (intent age at which we attempt to resolve). intentExp := now intentExp.WallTime -= intentAgeThreshold.Nanoseconds() gcArgs := &proto.InternalGCRequest{ RequestHeader: proto.RequestHeader{ Timestamp: now, RaftID: rng.Desc().RaftID, }, } var mu sync.Mutex var oldestIntentNanos int64 = math.MaxInt64 var wg sync.WaitGroup var expBaseKey proto.Key var keys []proto.EncodedKey var vals [][]byte // updateOldestIntent atomically updates the oldest intent. updateOldestIntent := func(intentNanos int64) { mu.Lock() defer mu.Unlock() if intentNanos < oldestIntentNanos { oldestIntentNanos = intentNanos } } // processKeysAndValues is invoked with each key and its set of // values. Intents older than the intent age threshold are sent for // resolution and values after the MVCC metadata, and possible // intent, are sent for garbage collection. processKeysAndValues := func() { // If there's more than a single value for the key, possibly send for GC. if len(keys) > 1 { meta := &engine.MVCCMetadata{} if err := gogoproto.Unmarshal(vals[0], meta); err != nil { log.Errorf("unable to unmarshal MVCC metadata for key %q: %s", keys[0], err) } else { // In the event that there's an active intent, send for // intent resolution if older than the threshold. startIdx := 1 if meta.Txn != nil { // Resolve intent asynchronously in a goroutine if the intent // is older than the intent expiration threshold. if meta.Timestamp.Less(intentExp) { wg.Add(1) go gcq.resolveIntent(rng, expBaseKey, meta, updateOldestIntent, &wg) } else { updateOldestIntent(meta.Timestamp.WallTime) } // With an active intent, GC ignores MVCC metadata & intent value. startIdx = 2 } // See if any values may be GC'd. if gcTS := gc.Filter(keys[startIdx:], vals[startIdx:]); !gcTS.Equal(proto.ZeroTimestamp) { // TODO(spencer): need to split the requests up into // multiple requests in the event that more than X keys // are added to the request. gcArgs.Keys = append(gcArgs.Keys, proto.InternalGCRequest_GCKey{Key: expBaseKey, Timestamp: gcTS}) } } } } // Iterate through this range's keys and values. for ; iter.Valid(); iter.Next() { baseKey, ts, isValue := engine.MVCCDecodeKey(iter.Key()) if !isValue { // Moving to the next key (& values). processKeysAndValues() expBaseKey = baseKey keys = []proto.EncodedKey{iter.Key()} vals = [][]byte{iter.Value()} } else { if !baseKey.Equal(expBaseKey) { log.Errorf("unexpectedly found a value for %q with ts=%s; expected key %q", baseKey, ts, expBaseKey) continue } keys = append(keys, iter.Key()) vals = append(vals, iter.Value()) } } if iter.Error() != nil { return iter.Error() } // Handle last collected set of keys/vals. processKeysAndValues() // Set start and end keys. switch len(gcArgs.Keys) { case 0: return nil case 1: gcArgs.Key = gcArgs.Keys[0].Key gcArgs.EndKey = gcArgs.Key.Next() default: gcArgs.Key = gcArgs.Keys[0].Key gcArgs.EndKey = gcArgs.Keys[len(gcArgs.Keys)-1].Key } // Wait for any outstanding intent resolves and set oldest extant intent. wg.Wait() gcMeta.OldestIntentNanos = gogoproto.Int64(oldestIntentNanos) // Send GC request through range. gcArgs.GCMeta = *gcMeta if err := rng.AddCmd(rng.context(), proto.Call{Args: gcArgs, Reply: &proto.InternalGCResponse{}}); err != nil { return err } // Store current timestamp as last verification for this range, as // we've just successfully scanned. if err := rng.SetLastVerificationTimestamp(now); err != nil { log.Errorf("failed to set last verification timestamp for range %s: %s", rng, err) } return nil }