// Send implements the client.Sender interface.
func (rls *retryableLocalSender) Send(_ context.Context, call proto.Call) {
	// Instant retry to handle the case of a range split, which is
	// exposed here as a RangeKeyMismatchError.
	retryOpts := retry.Options{
		Tag: fmt.Sprintf("routing %s locally", call.Method()),
	}
	// In local tests, the RPCs are not actually sent over the wire. We
	// need to clone the Txn in order to avoid unexpected sharing
	// between TxnCoordSender and client.Txn.
	if header := call.Args.Header(); header.Txn != nil {
		header.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction)
	}
	err := retry.WithBackoff(retryOpts, func() (retry.Status, error) {
		call.Reply.Header().Error = nil
		rls.LocalSender.Send(context.TODO(), call)
		// Check for range key mismatch error (this could happen if
		// range was split between lookup and execution). In this case,
		// reset header.Replica and engage retry loop.
		if err := call.Reply.Header().GoError(); err != nil {
			if _, ok := err.(*proto.RangeKeyMismatchError); ok {
				// Clear request replica.
				call.Args.Header().Replica = proto.Replica{}
				return retry.Continue, err
			}
		}
		return retry.Break, nil
	})
	if err != nil {
		panic(fmt.Sprintf("local sender did not succeed: %s", err))
	}
}
Exemple #2
0
// maybeWarnAboutInit looks for signs indicating a cluster which
// hasn't been initialized and warns. There's no absolutely sure way
// to determine whether the current node is simply waiting to be
// bootstrapped to an existing cluster vs. the operator having failed
// to initialize the cluster via the "cockroach init" command, so
// we can only warn.
//
// This method checks whether all gossip bootstrap hosts are
// connected, and whether the node itself is a bootstrap host, but
// there is still no sentinel gossip.
func (g *Gossip) maybeWarnAboutInit(stopper *util.Stopper) {
	stopper.RunWorker(func() {
		// Wait 5s before first check.
		select {
		case <-stopper.ShouldStop():
			return
		case <-time.After(5 * time.Second):
		}
		retryOptions := retry.Options{
			Tag:         "check cluster initialization",
			Backoff:     5 * time.Second,  // first backoff at 5s
			MaxBackoff:  60 * time.Second, // max backoff is 60s
			Constant:    2,                // doubles
			MaxAttempts: 0,                // indefinite retries
			Stopper:     stopper,          // stop no matter what on stopper
		}
		// will never error because infinite retries
		_ = retry.WithBackoff(retryOptions, func() (retry.Status, error) {
			g.mu.Lock()
			hasSentinel := g.is.getInfo(KeySentinel) != nil
			g.mu.Unlock()
			// If we have the sentinel, exit the retry loop.
			if hasSentinel {
				return retry.Break, nil
			}
			// Otherwise, if all bootstrap hosts are connected, warn.
			if g.triedAll {
				log.Warningf("connected to gossip but missing sentinel. Has the cluster been initialized? " +
					"Use \"cockroach init\" to initialize.")
			}
			return retry.Continue, nil
		})
	})
}
Exemple #3
0
// post posts the call using the HTTP client. The call's method is
// appended to KVDBEndpoint and set as the URL path. The call's arguments
// are protobuf-serialized and written as the POST body. The content
// type is set to application/x-protobuf.
//
// On success, the response body is unmarshalled into call.Reply.
func (s *httpSender) post(call proto.Call) error {
	retryOpts := s.retryOpts
	retryOpts.Tag = fmt.Sprintf("%s %s", s.context.RequestScheme(), call.Method())

	// Marshal the args into a request body.
	body, err := gogoproto.Marshal(call.Args)
	if err != nil {
		return err
	}

	url := s.context.RequestScheme() + "://" + s.server + KVDBEndpoint + call.Method().String()

	return retry.WithBackoff(retryOpts, func() (retry.Status, error) {
		req, err := http.NewRequest("POST", url, bytes.NewReader(body))
		if err != nil {
			return retry.Break, err
		}
		req.Header.Add(util.ContentTypeHeader, util.ProtoContentType)
		req.Header.Add(util.AcceptHeader, util.ProtoContentType)
		req.Header.Add(util.AcceptEncodingHeader, util.SnappyEncoding)

		resp, err := s.client.Do(req)
		if err != nil {
			return retry.Continue, err
		}

		defer resp.Body.Close()

		switch resp.StatusCode {
		case http.StatusOK:
			// We're cool.
		case http.StatusServiceUnavailable, http.StatusGatewayTimeout, StatusTooManyRequests:
			// Retry on service unavailable and request timeout.
			// TODO(spencer): consider respecting the Retry-After header for
			// backoff / retry duration.
			return retry.Continue, errors.New(resp.Status)
		default:
			// Can't recover from all other errors.
			return retry.Break, errors.New(resp.Status)
		}

		if resp.Header.Get(util.ContentEncodingHeader) == util.SnappyEncoding {
			resp.Body = &snappyReader{body: resp.Body}
		}

		b, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			return retry.Continue, err
		}

		if err := gogoproto.Unmarshal(b, call.Reply); err != nil {
			return retry.Continue, err
		}

		return retry.Break, nil
	})
}
Exemple #4
0
// allocateNodeID increments the node id generator key to allocate
// a new, unique node id. It will retry indefinitely on retryable
// errors.
func allocateNodeID(db *client.DB) (proto.NodeID, error) {
	var id proto.NodeID
	err := retry.WithBackoff(allocRetryOptions, func() (retry.Status, error) {
		r, err := db.Inc(keys.NodeIDGenerator, 1)
		if err != nil {
			status := retry.Break
			if _, ok := err.(util.Retryable); ok {
				status = retry.Continue
			}
			return status, util.Errorf("unable to allocate node ID: %s", err)
		}
		id = proto.NodeID(r.ValueInt())
		return retry.Break, nil
	})
	return id, err
}
Exemple #5
0
// allocateStoreIDs increments the store id generator key for the
// specified node to allocate "inc" new, unique store ids. The
// first ID in a contiguous range is returned on success. The call
// will retry indefinitely on retryable errors.
func allocateStoreIDs(nodeID proto.NodeID, inc int64, db *client.DB) (proto.StoreID, error) {
	var id proto.StoreID
	err := retry.WithBackoff(allocRetryOptions, func() (retry.Status, error) {
		r, err := db.Inc(keys.StoreIDGenerator, inc)
		if err != nil {
			status := retry.Break
			if _, ok := err.(util.Retryable); ok {
				status = retry.Continue
			}
			return status, util.Errorf("unable to allocate %d store IDs for node %d: %s", inc, nodeID, err)
		}
		id = proto.StoreID(r.ValueInt() - inc + 1)
		return retry.Break, nil
	})
	return id, err
}
Exemple #6
0
func (ia *idAllocator) start() {
	ia.stopper.RunWorker(func() {
		defer close(ia.ids)

		for {
			var newValue int64
			for newValue <= int64(ia.minID) {
				if ia.stopper.StartTask() {
					if err := retry.WithBackoff(idAllocationRetryOpts, func() (retry.Status, error) {
						idKey := ia.idKey.Load().(proto.Key)
						r, err := ia.db.Inc(idKey, int64(ia.blockSize))
						if err != nil {
							log.Warningf("unable to allocate %d ids from %s: %s", ia.blockSize, idKey, err)
							return retry.Continue, err
						}
						newValue = r.ValueInt()
						return retry.Break, nil
					}); err != nil {
						panic(fmt.Sprintf("unexpectedly exited id allocation retry loop: %s", err))
					}
					ia.stopper.FinishTask()
				} else {
					return
				}
			}

			end := newValue + 1
			start := end - int64(ia.blockSize)

			if start < int64(ia.minID) {
				start = int64(ia.minID)
			}

			// Add all new ids to the channel for consumption.
			for i := start; i < end; i++ {
				select {
				case ia.ids <- uint32(i):
				case <-ia.stopper.ShouldStop():
					return
				}
			}
		}
	})
}
Exemple #7
0
// connect dials the connection in a backoff/retry loop.
func (c *Client) connect(opts *retry.Options, context *Context) error {
	// Attempt to dial connection.
	retryOpts := clientRetryOptions
	if opts != nil {
		retryOpts = *opts
	}
	retryOpts.Tag = fmt.Sprintf("client %s connection", c.addr)
	retryOpts.Stopper = context.Stopper

	if err := retry.WithBackoff(retryOpts, func() (retry.Status, error) {
		tlsConfig, err := context.GetClientTLSConfig()
		if err != nil {
			// Problem loading the TLS config. Retrying will not help.
			return retry.Break, err
		}

		conn, err := tlsDialHTTP(c.addr.Network(), c.addr.String(), tlsConfig)
		if err != nil {
			// Retry if the error is temporary, otherwise fail fast.
			if t, ok := err.(net.Error); ok && t.Temporary() {
				return retry.Continue, err
			}
			return retry.Break, err
		}

		c.mu.Lock()
		c.Client = rpc.NewClientWithCodec(codec.NewClientCodec(conn))
		c.lAddr = conn.LocalAddr()
		c.mu.Unlock()

		// Ensure at least one heartbeat succeeds before exiting the
		// retry loop. If it fails, don't retry: The node is probably
		// dead.
		if err = c.heartbeat(); err != nil {
			return retry.Break, err
		}

		return retry.Break, nil
	}); err != nil {
		return err
	}

	return nil
}
Exemple #8
0
func (txn *Txn) exec(retryable func(txn *Txn) error) error {
	// Run retryable in a retry loop until we encounter a success or
	// error condition this loop isn't capable of handling.
	retryOpts := txn.db.txnRetryOptions
	retryOpts.Tag = txn.txn.Name
	err := retry.WithBackoff(retryOpts, func() (retry.Status, error) {
		txn.haveTxnWrite, txn.haveEndTxn = false, false // always reset before [re]starting txn
		err := retryable(txn)
		if err == nil {
			if !txn.haveEndTxn && txn.haveTxnWrite {
				// If there were no errors running retryable, commit the txn. This
				// may block waiting for outstanding writes to complete in case
				// retryable didn't -- we need the most recent of all response
				// timestamps in order to commit.
				etArgs := &proto.EndTransactionRequest{Commit: true}
				etReply := &proto.EndTransactionResponse{}
				err = txn.send(proto.Call{Args: etArgs, Reply: etReply})
			}
		}
		if restartErr, ok := err.(proto.TransactionRestartError); ok {
			if restartErr.CanRestartTransaction() == proto.TransactionRestart_IMMEDIATE {
				return retry.Reset, err
			} else if restartErr.CanRestartTransaction() == proto.TransactionRestart_BACKOFF {
				return retry.Continue, err
			}
			// By default, fall through and return Break.
		}
		return retry.Break, err
	})
	if err != nil && txn.haveTxnWrite {
		if replyErr := txn.send(proto.Call{
			Args:  &proto.EndTransactionRequest{Commit: false},
			Reply: &proto.EndTransactionResponse{},
		}); replyErr != nil {
			log.Errorf("failure aborting transaction: %s; abort caused by: %s", replyErr, err)
		}
		return err
	}
	return err
}
Exemple #9
0
// allocateBlock allocates a block of IDs using db.Increment and
// sends all IDs on the ids channel. Midway through the block, a
// special allocationTrigger ID is inserted which causes allocation
// to occur before IDs run out to hide Increment latency.
func (ia *idAllocator) allocateBlock(incr int64) {
	var newValue int64
	retryOpts := idAllocationRetryOpts
	err := retry.WithBackoff(retryOpts, func() (retry.Status, error) {
		idKey := ia.idKey.Load().(proto.Key)
		r, err := ia.db.Inc(idKey, incr)
		if err != nil {
			log.Warningf("unable to allocate %d ids from %s: %s", incr, idKey, err)
			return retry.Continue, err
		}
		newValue = r.ValueInt()
		return retry.Break, nil
	})
	if err != nil {
		panic(fmt.Sprintf("unexpectedly exited id allocation retry loop: %s", err))
	}

	if newValue <= ia.minID {
		log.Warningf("allocator key is currently set at %d; minID is %d; allocating again to skip %d IDs",
			newValue, ia.minID, ia.minID-newValue)
		ia.allocateBlock(ia.minID - newValue + ia.blockSize - 1)
		return
	}

	// Add all new ids to the channel for consumption.
	start := newValue - ia.blockSize + 1
	end := newValue + 1
	if start < ia.minID {
		start = ia.minID
	}

	for i := start; i < end; i++ {
		ia.ids <- i
		if i == (start+end)/2 {
			ia.ids <- allocationTrigger
		}
	}
}
Exemple #10
0
// Send implements the client.Sender interface. It verifies
// permissions and looks up the appropriate range based on the
// supplied key and sends the RPC according to the specified options.
//
// If the request spans multiple ranges (which is possible for Scan or
// DeleteRange requests), Send sends requests to the individual ranges
// sequentially and combines the results transparently.
//
// This may temporarily adjust the request headers, so the proto.Call
// must not be used concurrently until Send has returned.
func (ds *DistSender) Send(_ context.Context, call proto.Call) {
	args := call.Args
	finalReply := call.Reply
	endKey := args.Header().EndKey

	// Verify permissions.
	if err := ds.verifyPermissions(call.Args); err != nil {
		call.Reply.Header().SetGoError(err)
		return
	}

	// In the event that timestamp isn't set and read consistency isn't
	// required, set the timestamp using the local clock.
	if args.Header().ReadConsistency == proto.INCONSISTENT && args.Header().Timestamp.Equal(proto.ZeroTimestamp) {
		// Make sure that after the call, args hasn't changed.
		defer func(timestamp proto.Timestamp) {
			args.Header().Timestamp = timestamp
		}(args.Header().Timestamp)
		args.Header().Timestamp = ds.clock.Now()
	}

	// If this is a bounded request, we will change its bound as we receive
	// replies. This undoes that when we return.
	boundedArgs, _ := args.(proto.Bounded)

	if boundedArgs != nil {
		defer func(n int64) {
			boundedArgs.SetBound(n)
		}(boundedArgs.GetBound())
	}

	// Retry logic for lookup of range by key and RPCs to range replicas.
	retryOpts := ds.rpcRetryOptions
	retryOpts.Tag = "routing " + call.Method().String() + " rpc"

	curReply := finalReply
	for {
		call.Reply = curReply
		curReply.Header().Reset()

		var desc, descNext *proto.RangeDescriptor
		err := retry.WithBackoff(retryOpts, func() (retry.Status, error) {
			var err error
			// Get range descriptor (or, when spanning range, descriptors).
			// sendAttempt below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			desc, descNext, err = ds.getDescriptors(call)
			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if err != nil {
				if rErr, ok := err.(util.Retryable); ok && rErr.CanRetry() {
					return retry.Continue, err
				}
				return retry.Break, err
			}
			// Truncate the request to our current range, making sure not to
			// touch it unless we have to (it is illegal to send EndKey on
			// commands which do not operate on ranges).
			if descNext != nil {
				args.Header().EndKey = desc.EndKey
				defer func() {
					// "Untruncate" EndKey to original.
					args.Header().EndKey = endKey
				}()
			}
			return ds.sendAttempt(desc, call)
		})

		// Immediately return if querying a range failed non-retryably.
		// For multi-range requests, we return the failing range's reply.
		if err != nil {
			call.Reply.Header().SetGoError(err)
			return
		}

		if finalReply != curReply {
			// This was the second or later call in a multi-range request.
			// Combine the new response with the existing one.
			if cFinalReply, ok := finalReply.(proto.Combinable); ok {
				cFinalReply.Combine(curReply)
			} else {
				// This should never apply in practice, as we'll only end up here
				// for range-spanning requests.
				call.Reply.Header().SetGoError(util.Errorf("multi-range request with non-combinable response type"))
				return
			}
		}

		// If this request has a bound, such as MaxResults in
		// ScanRequest, check whether enough rows have been retrieved.
		if boundedArgs != nil {
			if prevBound := boundedArgs.GetBound(); prevBound > 0 {
				if cReply, ok := curReply.(proto.Countable); ok {
					if nextBound := prevBound - cReply.Count(); nextBound > 0 {
						// Update bound for the next round.
						// We've deferred restoring the original bound earlier.
						boundedArgs.SetBound(nextBound)
					} else {
						// Set flag to break the loop.
						descNext = nil
					}
				}
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if descNext == nil {
			break
		}

		if curReply == finalReply {
			// This is the end of the first iteration in a multi-range query,
			// so it's a convenient place to clean up changes to the args in
			// the case of multi-range requests.
			// Reset original start key (the EndKey is taken care of without
			// defer above).
			defer func(k proto.Key) {
				args.Header().Key = k
			}(args.Header().Key)
		}

		// In next iteration, query next range.
		args.Header().Key = descNext.StartKey

		// This is a multi-range request, make a new reply object for
		// subsequent iterations of the loop.
		curReply = args.CreateReply()
	}
	call.Reply = finalReply
}