// reserve send a reservation request rpc to the node and store // based on the toStoreID. It returns an error if the reservation was not // successfully booked. When unsuccessful, the store is marked as having a // declined reservation so it will not be considered for up-replication or // rebalancing until after the configured timeout period has passed. // TODO(bram): consider moving the nodeID to the store pool during // NewStorePool. func (sp *StorePool) reserve( curIdent roachpb.StoreIdent, toStoreID roachpb.StoreID, rangeID roachpb.RangeID, rangeSize int64, ) error { if !sp.reservationsEnabled { return nil } sp.mu.Lock() defer sp.mu.Unlock() detail, ok := sp.mu.stores[toStoreID] if !ok { return errors.Errorf("store %d does not exist in the store pool", toStoreID) } addr, err := sp.resolver(detail.desc.Node.NodeID) if err != nil { return err } conn, err := sp.rpcContext.GRPCDial(addr.String()) if err != nil { return errors.Wrapf(err, "failed to dial store %+v, addr %q, node %+v", toStoreID, addr, detail.desc.Node) } client := roachpb.NewInternalStoresClient(conn) req := &roachpb.ReservationRequest{ StoreRequestHeader: roachpb.StoreRequestHeader{ NodeID: detail.desc.Node.NodeID, StoreID: toStoreID, }, FromNodeID: curIdent.NodeID, FromStoreID: curIdent.StoreID, RangeSize: rangeSize, RangeID: rangeID, } if log.V(2) { log.Infof(context.TODO(), "proposing new reservation:%+v", req) } ctxWithTimeout, cancel := context.WithTimeout(context.TODO(), sp.reserveRPCTimeout) defer cancel() resp, err := client.Reserve(ctxWithTimeout, req) // If a reservation is declined, be it due to an error or because it was // rejected, we mark the store detail as having been rejected so it won't // be considered as a candidate for new replicas until after the configured // timeout period has passed. if err != nil { detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.failedReservationsTimeout) if log.V(2) { log.Infof(context.TODO(), "reservation failed, store:%s will be throttled for %s until %s", toStoreID, sp.failedReservationsTimeout, detail.throttledUntil) } return errors.Wrapf(err, "reservation failed:%+v", req) } if resp.RangeCount != nil { detail.desc.Capacity.RangeCount = *resp.RangeCount } if !resp.Reserved { detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.declinedReservationsTimeout) if log.V(2) { log.Infof(context.TODO(), "reservation failed, store:%s will be throttled for %s until %s", toStoreID, sp.declinedReservationsTimeout, detail.throttledUntil) } return errors.Errorf("reservation declined:%+v", req) } if log.V(2) { log.Infof(context.TODO(), "reservation was approved:%+v", req) } return nil }
// waitForStoreFrozen polls the given stores until they all report having no // unfrozen Replicas (or an error or timeout occurs). func (s *adminServer) waitForStoreFrozen( stream serverpb.Admin_ClusterFreezeServer, stores map[roachpb.StoreID]roachpb.NodeID, wantFrozen bool, ) error { mu := struct { syncutil.Mutex oks map[roachpb.StoreID]bool }{ oks: make(map[roachpb.StoreID]bool), } opts := base.DefaultRetryOptions() opts.Closer = s.server.stopper.ShouldQuiesce() opts.MaxRetries = 20 sem := make(chan struct{}, 256) errChan := make(chan error, 1) sendErr := func(err error) { select { case errChan <- err: default: } } numWaiting := len(stores) // loop until this drops to zero var err error for r := retry.Start(opts); r.Next(); { mu.Lock() for storeID, nodeID := range stores { storeID, nodeID := storeID, nodeID // loop-local copies for goroutine var nodeDesc roachpb.NodeDescriptor if err := s.server.gossip.GetInfoProto(gossip.MakeNodeIDKey(nodeID), &nodeDesc); err != nil { sendErr(err) break } addr := nodeDesc.Address.String() if _, inflightOrSucceeded := mu.oks[storeID]; inflightOrSucceeded { continue } mu.oks[storeID] = false // mark as inflight action := func() (err error) { var resp *roachpb.PollFrozenResponse defer func() { message := fmt.Sprintf("node %d, store %d: ", nodeID, storeID) if err != nil { message += err.Error() } else { numMismatching := len(resp.Results) mu.Lock() if numMismatching == 0 { // If the Store is in the right state, mark it as such. // This means we won't try it again. message += "ready" mu.oks[storeID] = true } else { // Otherwise, forget that we tried the Store so that // the retry loop picks it up again. message += fmt.Sprintf("%d replicas report wrong status", numMismatching) if limit := 10; numMismatching > limit { message += " [truncated]: " resp.Results = resp.Results[:limit] } else { message += ": " } message += fmt.Sprintf("%+v", resp.Results) delete(mu.oks, storeID) } mu.Unlock() err = stream.Send(&serverpb.ClusterFreezeResponse{ Message: message, }) } }() conn, err := s.server.rpcContext.GRPCDial(addr) if err != nil { return err } client := roachpb.NewInternalStoresClient(conn) resp, err = client.PollFrozen(context.Background(), &roachpb.PollFrozenRequest{ StoreRequestHeader: roachpb.StoreRequestHeader{ NodeID: nodeID, StoreID: storeID, }, // If we are looking to freeze everything, we want to // collect thawed Replicas, and vice versa. CollectFrozen: !wantFrozen, }) return err } // Run a limited, non-blocking task. That means the task simply // won't run if the semaphore is full (or the node is draining). // Both are handled by the surrounding retry loop. if err := s.server.stopper.RunLimitedAsyncTask(sem, func() { if err := action(); err != nil { sendErr(err) } }); err != nil { // Node draining. sendErr(err) break } } numWaiting = len(stores) for _, ok := range mu.oks { if ok { // Store has reported that it is frozen. numWaiting-- continue } } mu.Unlock() select { case err = <-errChan: default: } // Keep going unless there's been an error or everyone's frozen. if err != nil || numWaiting == 0 { break } if err := stream.Send(&serverpb.ClusterFreezeResponse{ Message: fmt.Sprintf("waiting for %d store%s to apply operation", numWaiting, util.Pluralize(int64(numWaiting))), }); err != nil { return err } } if err != nil { return err } if numWaiting > 0 { err = fmt.Errorf("timed out waiting for %d store%s to report freeze", numWaiting, util.Pluralize(int64(numWaiting))) } return err }