예제 #1
0
// waitForStoreFrozen polls the given stores until they all report having no
// unfrozen Replicas (or an error or timeout occurs).
func (s *adminServer) waitForStoreFrozen(
	stream serverpb.Admin_ClusterFreezeServer,
	stores map[roachpb.StoreID]roachpb.NodeID,
	wantFrozen bool,
) error {
	mu := struct {
		syncutil.Mutex
		oks map[roachpb.StoreID]bool
	}{
		oks: make(map[roachpb.StoreID]bool),
	}

	opts := base.DefaultRetryOptions()
	opts.Closer = s.server.stopper.ShouldQuiesce()
	opts.MaxRetries = 20
	sem := make(chan struct{}, 256)
	errChan := make(chan error, 1)
	sendErr := func(err error) {
		select {
		case errChan <- err:
		default:
		}
	}

	numWaiting := len(stores) // loop until this drops to zero
	var err error
	for r := retry.Start(opts); r.Next(); {
		mu.Lock()
		for storeID, nodeID := range stores {
			storeID, nodeID := storeID, nodeID // loop-local copies for goroutine
			var nodeDesc roachpb.NodeDescriptor
			if err := s.server.gossip.GetInfoProto(gossip.MakeNodeIDKey(nodeID), &nodeDesc); err != nil {
				sendErr(err)
				break
			}
			addr := nodeDesc.Address.String()

			if _, inflightOrSucceeded := mu.oks[storeID]; inflightOrSucceeded {
				continue
			}
			mu.oks[storeID] = false // mark as inflight
			action := func() (err error) {
				var resp *storage.PollFrozenResponse
				defer func() {
					message := fmt.Sprintf("node %d, store %d: ", nodeID, storeID)

					if err != nil {
						message += err.Error()
					} else {
						numMismatching := len(resp.Results)
						mu.Lock()
						if numMismatching == 0 {
							// If the Store is in the right state, mark it as such.
							// This means we won't try it again.
							message += "ready"
							mu.oks[storeID] = true
						} else {
							// Otherwise, forget that we tried the Store so that
							// the retry loop picks it up again.
							message += fmt.Sprintf("%d replicas report wrong status", numMismatching)
							if limit := 10; numMismatching > limit {
								message += " [truncated]: "
								resp.Results = resp.Results[:limit]
							} else {
								message += ": "
							}
							message += fmt.Sprintf("%+v", resp.Results)
							delete(mu.oks, storeID)
						}
						mu.Unlock()
					}
					err = stream.Send(&serverpb.ClusterFreezeResponse{
						Message: message,
					})
				}()
				conn, err := s.server.rpcContext.GRPCDial(addr)
				if err != nil {
					return err
				}
				client := storage.NewFreezeClient(conn)
				resp, err = client.PollFrozen(context.TODO(),
					&storage.PollFrozenRequest{
						StoreRequestHeader: storage.StoreRequestHeader{
							NodeID:  nodeID,
							StoreID: storeID,
						},
						// If we are looking to freeze everything, we want to
						// collect thawed Replicas, and vice versa.
						CollectFrozen: !wantFrozen,
					})
				return err
			}
			// Run a limited, non-blocking task. That means the task simply
			// won't run if the semaphore is full (or the node is draining).
			// Both are handled by the surrounding retry loop.
			if err := s.server.stopper.RunLimitedAsyncTask(
				context.TODO(), sem, true /* wait */, func(_ context.Context) {
					if err := action(); err != nil {
						sendErr(err)
					}
				}); err != nil {
				// Node draining.
				sendErr(err)
				break
			}
		}

		numWaiting = len(stores)
		for _, ok := range mu.oks {
			if ok {
				// Store has reported that it is frozen.
				numWaiting--
				continue
			}
		}
		mu.Unlock()

		select {
		case err = <-errChan:
		default:
		}

		// Keep going unless there's been an error or everyone's frozen.
		if err != nil || numWaiting == 0 {
			break
		}
		if err := stream.Send(&serverpb.ClusterFreezeResponse{
			Message: fmt.Sprintf("waiting for %d store%s to apply operation",
				numWaiting, util.Pluralize(int64(numWaiting))),
		}); err != nil {
			return err
		}
	}
	if err != nil {
		return err
	}
	if numWaiting > 0 {
		err = fmt.Errorf("timed out waiting for %d store%s to report freeze",
			numWaiting, util.Pluralize(int64(numWaiting)))
	}
	return err
}
예제 #2
0
func (s *adminServer) ClusterFreeze(
	req *serverpb.ClusterFreezeRequest, stream serverpb.Admin_ClusterFreezeServer,
) error {
	var totalAffected int64
	stores := make(map[roachpb.StoreID]roachpb.NodeID)
	process := func(from, to roachpb.Key) (roachpb.Key, error) {
		b := &client.Batch{}
		fa := roachpb.NewChangeFrozen(from, to, req.Freeze, build.GetInfo().Tag)
		b.AddRawRequest(fa)
		if err := s.server.db.Run(context.TODO(), b); err != nil {
			return nil, err
		}
		fr := b.RawResponse().Responses[0].GetInner().(*roachpb.ChangeFrozenResponse)
		totalAffected += fr.RangesAffected
		for storeID, nodeID := range fr.Stores {
			stores[storeID] = nodeID
		}
		return fr.MinStartKey.AsRawKey(), nil
	}

	task := "thaw"
	if req.Freeze {
		task = "freeze"
		// When freezing, we save the meta2 and meta1 range for last to avoid
		// interfering with command routing.
		// Note that we freeze only Ranges whose StartKey is included. In
		// particular, a Range which contains some meta keys will not be frozen
		// by the request that begins at Meta2KeyMax. ChangeFreeze gives us the
		// leftmost covered Range back, which we use for the next request to
		// avoid split-related races.
		freezeTo := roachpb.KeyMax // updated as we go along
		freezeFroms := []roachpb.Key{
			keys.Meta2KeyMax, // freeze userspace
			keys.Meta1KeyMax, // freeze all meta2 ranges
			keys.LocalMax,    // freeze first range (meta1)
		}

		for i, freezeFrom := range freezeFroms {
			if err := stream.Send(&serverpb.ClusterFreezeResponse{
				Message: fmt.Sprintf("freezing meta ranges [stage %d]", i+1),
			}); err != nil {
				return err
			}
			var err error
			if freezeTo, err = process(freezeFrom, freezeTo); err != nil {
				return err
			}
		}
	} else {
		if err := stream.Send(&serverpb.ClusterFreezeResponse{
			Message: fmt.Sprintf("unfreezing ranges"),
		}); err != nil {
			return err
		}
		// When unfreezing, we walk in opposite order and try the first range
		// first. We should be able to get there if the first range manages to
		// gossip. From that, we can talk to the second level replicas, and
		// then to everyone else. Because ChangeFrozen works in forward order,
		// we can simply hit the whole keyspace at once.
		// TODO(tschottdorf): make the first range replicas gossip their
		// descriptor unconditionally or we won't always be able to unfreeze
		// (except by restarting a node which holds the first range).
		if _, err := process(keys.LocalMax, roachpb.KeyMax); err != nil {
			return err
		}
	}
	if err := stream.Send(&serverpb.ClusterFreezeResponse{
		RangesAffected: totalAffected,
		Message:        fmt.Sprintf("proposed %s to %d ranges", task, totalAffected),
	}); err != nil {
		return err
	}
	return s.waitForStoreFrozen(stream, stores, req.Freeze)
}