// InternalRangeLookup is used to look up RangeDescriptors - a RangeDescriptor // is a metadata structure which describes the key range and replica locations // of a distinct range in the cluster. // // RangeDescriptors are stored as values in the cockroach cluster's key-value // store. However, they are always stored using special "Range Metadata keys", // which are "ordinary" keys with a special prefix appended. The Range Metadata // Key for an ordinary key can be generated with the `engine.RangeMetaKey(key)` // function. The RangeDescriptor for the range which contains a given key can be // retrieved by generating its Range Metadata Key and dispatching it to // InternalRangeLookup. // // Note that the Range Metadata Key sent to InternalRangeLookup is NOT the key // at which the desired RangeDescriptor is stored. Instead, this method returns // the RangeDescriptor stored at the _lowest_ existing key which is _greater_ // than the given key. The returned RangeDescriptor will thus contain the // ordinary key which was originally used to generate the Range Metadata Key // sent to InternalRangeLookup. // // This method has an important optimization: instead of just returning the // request RangeDescriptor, it also returns a slice of additional range // descriptors immediately consecutive to the desired RangeDescriptor. This is // intended to serve as a sort of caching pre-fetch, so that the requesting // nodes can aggressively cache RangeDescriptors which are likely to be desired // by their current workload. func (r *Range) InternalRangeLookup(args *proto.InternalRangeLookupRequest, reply *proto.InternalRangeLookupResponse) { if err := engine.ValidateRangeMetaKey(args.Key); err != nil { reply.SetGoError(err) return } rangeCount := int64(args.MaxRanges) if rangeCount < 1 { reply.SetGoError(util.Errorf( "Range lookup specified invalid maximum range count %d: must be > 0", rangeCount)) return } // We want to search for the metadata key just greater than args.Key. Scan // for both the requested key and the keys immediately afterwards, up to // MaxRanges. metaPrefix := args.Key[:len(engine.KeyMeta1Prefix)] nextKey := engine.NextKey(args.Key) kvs, err := r.engine.Scan(nextKey, engine.PrefixEndKey(metaPrefix), rangeCount) if err != nil { reply.SetGoError(err) return } // The initial key must have the same metadata level prefix as we queried. if len(kvs) == 0 { // At this point the range has been verified to contain the requested // key, but no matching results were returned from the scan. This could // indicate a very bad system error, but for now we will just treat it // as a retryable Key Mismatch error. err := proto.NewRangeKeyMismatchError(args.Key, args.Key, r.Meta) reply.SetGoError(err) log.Errorf("InternalRangeLookup dispatched to correct range, but no matching RangeDescriptor was found. %s", err) return } // Decode all scanned range descriptors, stopping if a range is encountered // which does not have the same metadata prefix as the queried key. rds := make([]proto.RangeDescriptor, len(kvs)) for i := range kvs { if err = gogoproto.Unmarshal(kvs[i].Value, &rds[i]); err != nil { reply.SetGoError(err) return } } reply.Ranges = rds return }
// InternalRangeLookup is used to look up RangeDescriptors - a RangeDescriptor // is a metadata structure which describes the key range and replica locations // of a distinct range in the cluster. // // RangeDescriptors are stored as values in the cockroach cluster's key-value // store. However, they are always stored using special "Range Metadata keys", // which are "ordinary" keys with a special prefix prepended. The Range Metadata // Key for an ordinary key can be generated with the `keys.RangeMetaKey(key)` // function. The RangeDescriptor for the range which contains a given key can be // retrieved by generating its Range Metadata Key and dispatching it to // InternalRangeLookup. // // Note that the Range Metadata Key sent to InternalRangeLookup is NOT the key // at which the desired RangeDescriptor is stored. Instead, this method returns // the RangeDescriptor stored at the _lowest_ existing key which is _greater_ // than the given key. The returned RangeDescriptor will thus contain the // ordinary key which was originally used to generate the Range Metadata Key // sent to InternalRangeLookup. // // The "Range Metadata Key" for a range is built by appending the end key of // the range to the meta[12] prefix because the RocksDB iterator only supports // a Seek() interface which acts as a Ceil(). Using the start key of the range // would cause Seek() to find the key after the meta indexing record we're // looking for, which would result in having to back the iterator up, an option // which is both less efficient and not available in all cases. // // This method has an important optimization: instead of just returning the // request RangeDescriptor, it also returns a slice of additional range // descriptors immediately consecutive to the desired RangeDescriptor. This is // intended to serve as a sort of caching pre-fetch, so that the requesting // nodes can aggressively cache RangeDescriptors which are likely to be desired // by their current workload. func (r *Range) InternalRangeLookup(batch engine.Engine, args *proto.InternalRangeLookupRequest, reply *proto.InternalRangeLookupResponse) []proto.Intent { if err := keys.ValidateRangeMetaKey(args.Key); err != nil { reply.SetGoError(err) return nil } rangeCount := int64(args.MaxRanges) if rangeCount < 1 { reply.SetGoError(util.Errorf( "Range lookup specified invalid maximum range count %d: must be > 0", rangeCount)) return nil } if args.IgnoreIntents { rangeCount = 1 // simplify lookup because we may have to retry to read new } // We want to search for the metadata key just greater than args.Key. Scan // for both the requested key and the keys immediately afterwards, up to // MaxRanges. startKey, endKey := keys.MetaScanBounds(args.Key) // Scan inconsistently. Any intents encountered are bundled up, but other- // wise ignored. kvs, intents, err := engine.MVCCScan(batch, startKey, endKey, rangeCount, args.Timestamp, false /* !consistent */, args.Txn) if err != nil { // An error here would likely amount to something seriously going // wrong. reply.SetGoError(err) return nil } if args.IgnoreIntents && len(intents) > 0 { // NOTE (subtle): in general, we want to try to clean up dangling // intents on meta records. However, if we're in the process of // cleaning up a dangling intent on a meta record by pushing the // transaction, we don't want to create an infinite loop: // // intent! -> push-txn -> range-lookup -> intent! -> etc... // // Instead we want: // // intent! -> push-txn -> range-lookup -> ignore intent, return old/new ranges // // On the range-lookup from a push transaction, we therefore // want to suppress WriteIntentErrors and return a value // anyway. But which value? We don't know whether the range // update succeeded or failed, but if we don't return the // correct range descriptor we may not be able to find the // transaction to push. Since we cannot know the correct answer, // we choose randomly between the pre- and post- transaction // values. If we guess wrong, the client will try again and get // the other value (within a few tries). if rand.Intn(2) == 0 { key, txn := intents[0].Key, &intents[0].Txn val, _, err := engine.MVCCGet(batch, key, txn.Timestamp, true, txn) if err != nil { reply.SetGoError(err) return nil } kvs = []proto.KeyValue{{Key: key, Value: *val}} } } if len(kvs) == 0 { // No matching results were returned from the scan. This could // indicate a very bad system error, but for now we will just // treat it as a retryable Key Mismatch error. err := proto.NewRangeKeyMismatchError(args.Key, args.EndKey, r.Desc()) reply.SetGoError(err) log.Errorf("InternalRangeLookup dispatched to correct range, but no matching RangeDescriptor was found. %s", err) return nil } // Decode all scanned range descriptors, stopping if a range is encountered // which does not have the same metadata prefix as the queried key. rds := make([]proto.RangeDescriptor, len(kvs)) for i := range kvs { // TODO(tschottdorf) Candidate for a ReplicaCorruptionError, once we // introduce that. if err = gogoproto.Unmarshal(kvs[i].Value.Bytes, &rds[i]); err != nil { reply.SetGoError(err) return nil } } reply.Ranges = rds return intents }