Ejemplo n.º 1
0
func (ctl *Ctl) startCtlLOCKED(
	mode string,
	memberNodeUUIDs []string,
	ctlOnProgress CtlOnProgressFunc) error {
	ctl.incRevNumLOCKED()

	ctlDoneCh := make(chan struct{})
	ctl.ctlDoneCh = ctlDoneCh

	ctlStopCh := make(chan struct{})
	ctl.ctlStopCh = ctlStopCh

	ctlChangeTopology := &CtlChangeTopology{
		Rev:             fmt.Sprintf("%d", ctl.revNum),
		Mode:            mode,
		MemberNodeUUIDs: memberNodeUUIDs,
	}
	ctl.ctlChangeTopology = ctlChangeTopology

	authType := ""
	if ctl.optionsMgr != nil {
		authType = ctl.optionsMgr["authType"]
	}

	httpGetWithAuth := func(urlStr string) (resp *http.Response, err error) {
		if authType == "cbauth" {
			return cbgt.CBAuthHttpGet(urlStr)
		}

		return http.Get(urlStr)
	}

	// The ctl goroutine.
	//
	go func() {
		var ctlErrs []error
		var ctlWarnings map[string][]string

		// Cleanup ctl goroutine.
		//
		defer func() {
			if ctlWarnings == nil {
				// If there were no warnings, see if there were any
				// warnings left in the plan.
				planPIndexes, _, err :=
					cbgt.PlannerGetPlanPIndexes(ctl.cfg, cbgt.VERSION)
				if err == nil {
					if planPIndexes != nil {
						ctlWarnings = planPIndexes.Warnings
					}
				} else {
					ctlErrs = append(ctlErrs, err)
				}
			}

			memberNodes, err := CurrentMemberNodes(ctl.cfg)
			if err != nil {
				ctlErrs = append(ctlErrs, err)
			}

			ctl.m.Lock()

			ctl.incRevNumLOCKED()

			ctl.memberNodes = memberNodes

			if ctl.ctlDoneCh == ctlDoneCh {
				ctl.ctlDoneCh = nil
			}

			if ctl.ctlStopCh == ctlStopCh {
				ctl.ctlStopCh = nil
			}

			if ctl.ctlChangeTopology == ctlChangeTopology {
				ctl.ctlChangeTopology = nil
			}

			ctl.prevWarnings = ctlWarnings
			ctl.prevErrs = ctlErrs

			if ctlOnProgress != nil {
				ctlOnProgress(0, 0, nil, nil, nil, nil, nil, ctlErrs)
			}

			ctl.m.Unlock()

			close(ctlDoneCh)
		}()

		// 1) Monitor cfg to wait for wanted nodes to appear.
		//
		nodesToRemove, err :=
			ctl.waitForWantedNodes(memberNodeUUIDs, ctlStopCh)
		if err != nil {
			log.Printf("ctl: waitForWantedNodes, err: %v", err)
			ctlErrs = append(ctlErrs, err)
			return
		}

		// 2) Run rebalance in a loop (if not failover).
		//
		failover := strings.HasPrefix(mode, "failover")
		if !failover {
			// The loop handles the case if the index definitions had
			// changed during the midst of the rebalance, in which
			// case we run rebalance again.
		REBALANCE_LOOP:
			for {
				// Retrieve the indexDefs before we do anything.
				indexDefsStart, err :=
					cbgt.PlannerGetIndexDefs(ctl.cfg, cbgt.VERSION)
				if err != nil {
					log.Printf("ctl: PlannerGetIndexDefs, err: %v", err)

					ctlErrs = append(ctlErrs, err)
					return
				}

				if indexDefsStart == nil ||
					len(indexDefsStart.IndexDefs) <= 0 {
					break REBALANCE_LOOP
				}

				// Start rebalance and monitor progress.
				r, err := rebalance.StartRebalance(cbgt.VERSION,
					ctl.cfg, ctl.server, ctl.optionsMgr,
					nodesToRemove,
					rebalance.RebalanceOptions{
						FavorMinNodes: ctl.optionsCtl.FavorMinNodes,
						DryRun:        ctl.optionsCtl.DryRun,
						Verbose:       ctl.optionsCtl.Verbose,
						HttpGet:       httpGetWithAuth,
					})
				if err != nil {
					log.Printf("ctl: StartRebalance, err: %v", err)

					ctlErrs = append(ctlErrs, err)
					return
				}

				progressDoneCh := make(chan error)
				go func() {
					defer close(progressDoneCh)

					progressToString := func(maxNodeLen, maxPIndexLen int,
						seenNodes map[string]bool,
						seenNodesSorted []string,
						seenPIndexes map[string]bool,
						seenPIndexesSorted []string,
						progressEntries map[string]map[string]map[string]*rebalance.ProgressEntry,
					) string {
						if ctlOnProgress != nil {
							return ctlOnProgress(maxNodeLen, maxPIndexLen,
								seenNodes,
								seenNodesSorted,
								seenPIndexes,
								seenPIndexesSorted,
								progressEntries,
								nil)
						}

						return rebalance.ProgressTableString(
							maxNodeLen, maxPIndexLen,
							seenNodes,
							seenNodesSorted,
							seenPIndexes,
							seenPIndexesSorted,
							progressEntries)
					}

					err = rebalance.ReportProgress(r, progressToString)
					if err != nil {
						log.Printf("ctl: ReportProgress, err: %v", err)
						progressDoneCh <- err
					}
				}()

				defer r.Stop()

				select {
				case <-ctlStopCh:
					return // Exit ctl goroutine.

				case err = <-progressDoneCh:
					if err != nil {
						ctlErrs = append(ctlErrs, err)
						return
					}
				}

				ctlWarnings = r.GetEndPlanPIndexes().Warnings

				// Repeat if the indexDefs had changed mid-rebalance.
				indexDefsEnd, err :=
					cbgt.PlannerGetIndexDefs(ctl.cfg, cbgt.VERSION)
				if err != nil {
					ctlErrs = append(ctlErrs, err)
					return
				}

				if reflect.DeepEqual(indexDefsStart, indexDefsEnd) {
					// NOTE: There's a race or hole here where at this
					// point we think the indexDefs haven't changed;
					// but, an adversary could still change the
					// indexDefs before we can run the PlannerSteps().
					break REBALANCE_LOOP
				}
			}
		}

		// 3) Run planner steps, like unregister and failover.
		//
		steps := map[string]bool{"unregister": true}
		if failover {
			steps["failover_"] = true
		} else {
			steps["planner"] = true
		}

		err = cmd.PlannerSteps(steps, ctl.cfg, cbgt.VERSION,
			ctl.server, ctl.optionsMgr, nodesToRemove,
			ctl.optionsCtl.DryRun, nil)
		if err != nil {
			log.Printf("ctl: PlannerSteps, err: %v", err)
			ctlErrs = append(ctlErrs, err)
		}
	}()

	return nil
}
Ejemplo n.º 2
0
func (m *CtlMgr) startTopologyChangeTaskHandleLOCKED(
	change service.TopologyChange) (*taskHandle, error) {
	ctlChangeTopology := &CtlChangeTopology{
		Rev: string(change.CurrentTopologyRev),
	}

	switch change.Type {
	case service.TopologyChangeTypeRebalance:
		ctlChangeTopology.Mode = "rebalance"

	case service.TopologyChangeTypeFailover:
		ctlChangeTopology.Mode = "failover-hard"

	default:
		log.Printf("ctl/manager, unknown change.Type: %v", change.Type)
		return nil, service.ErrNotSupported
	}

	for _, node := range change.KeepNodes {
		// TODO: What about node.RecoveryType?

		nodeUUID := string(node.NodeInfo.NodeID)

		ctlChangeTopology.MemberNodeUUIDs =
			append(ctlChangeTopology.MemberNodeUUIDs, nodeUUID)
	}

	taskId := "rebalance:" + change.ID

	// The progressEntries is a map of pindex ->
	// source_partition -> node -> *rebalance.ProgressEntry.
	onProgress := func(maxNodeLen, maxPIndexLen int,
		seenNodes map[string]bool,
		seenNodesSorted []string,
		seenPIndexes map[string]bool,
		seenPIndexesSorted []string,
		progressEntries map[string]map[string]map[string]*rebalance.ProgressEntry,
		errs []error,
	) string {
		m.updateProgress(taskId, seenNodes, seenPIndexes, progressEntries, errs)

		if progressEntries == nil {
			return "DONE"
		}

		return rebalance.ProgressTableString(
			maxNodeLen, maxPIndexLen,
			seenNodes,
			seenNodesSorted,
			seenPIndexes,
			seenPIndexesSorted,
			progressEntries)
	}

	ctlTopology, err := m.ctl.ChangeTopology(ctlChangeTopology, onProgress)
	if err != nil {
		return nil, err
	}

	revNum := m.allocRevNumLOCKED(m.tasks.revNum)

	th := &taskHandle{
		startTime: time.Now(),
		task: &service.Task{
			Rev:              EncodeRev(revNum),
			ID:               taskId,
			Type:             service.TaskTypeRebalance,
			Status:           service.TaskStatusRunning,
			IsCancelable:     true,
			Progress:         0.0,
			DetailedProgress: map[service.NodeID]float64{},
			Description:      "topology change",
			ErrorMessage:     "",
			Extra: map[string]interface{}{
				"topologyChange": change,
			},
		},
		stop: func() {
			log.Printf("ctl/manager, stop taskHandle, ctlTopology.Rev: %v",
				ctlTopology.Rev)

			m.ctl.StopChangeTopology(ctlTopology.Rev)
		},
	}

	return th, nil
}