// Failover promotes replicas to primary for the remaining nodes. func Failover(cfg cbgt.Cfg, version string, server string, options map[string]string, nodesFailover []string) (bool, error) { mapNodesFailover := cbgt.StringsToMap(nodesFailover) uuid := "" indexDefs, nodeDefs, planPIndexesPrev, cas, err := cbgt.PlannerGetPlan(cfg, version, uuid) if err != nil { return false, err } planPIndexesCalc, err := cbgt.CalcPlan("failover", indexDefs, nodeDefs, planPIndexesPrev, version, server, options, nil) if err != nil { return false, fmt.Errorf("planner: failover CalcPlan, err: %v", err) } planPIndexesNext := cbgt.CopyPlanPIndexes(planPIndexesPrev, version) for planPIndexName, planPIndex := range planPIndexesNext.PlanPIndexes { for node, planPIndexNode := range planPIndex.Nodes { if !mapNodesFailover[node] { continue } if planPIndexNode.Priority <= 0 { // Failover'ed node used to be a primary for this // pindex, so find a replica to promote. promoted := "" PROMOTE_REPLICA: for nodePro, ppnPro := range planPIndex.Nodes { if mapNodesFailover[nodePro] { continue } if ppnPro.Priority >= 1 { ppnPro.Priority = 0 planPIndex.Nodes[nodePro] = ppnPro promoted = nodePro break PROMOTE_REPLICA } } // If we didn't find a replica to promote, and we're // configured with the option to // "failoverAssignAllPrimaries-IndexName" or // "failoverAssignAllPrimaries" (default true), then // assign the primary from the calculated plan. if promoted == "" && ParseOptionsBool(options, "failoverAssignAllPrimaries", planPIndex.IndexName, true) { planPIndexCalc, exists := planPIndexesCalc.PlanPIndexes[planPIndexName] if exists && planPIndexCalc != nil { ASSIGN_PRIMARY: for nodeCalc, ppnCalc := range planPIndexCalc.Nodes { if ppnCalc.Priority <= 0 && !mapNodesFailover[nodeCalc] { planPIndex.Nodes[nodeCalc] = ppnCalc promoted = nodeCalc break ASSIGN_PRIMARY } } } } } delete(planPIndex.Nodes, node) } } // TODO: Missing under-replication constraint warnings. if cbgt.SamePlanPIndexes(planPIndexesNext, planPIndexesPrev) { return false, nil } _, err = cbgt.CfgSetPlanPIndexes(cfg, planPIndexesNext, cas) if err != nil { return false, fmt.Errorf("planner: failover could not save plan,"+ " perhaps a concurrent planner won, cas: %d, err: %v", cas, err) } return true, nil }
// StartRebalance begins a concurrent, cluster-wide rebalancing of all // the indexes (and their index partitions) on a cluster of cbgt // nodes. StartRebalance utilizes the blance library for calculating // and orchestrating partition reassignments and the cbgt/rest/monitor // library to watch for progress and errors. func StartRebalance(version string, cfg cbgt.Cfg, server string, nodesToRemoveParam []string, options RebalanceOptions) ( *Rebalancer, error) { // TODO: Need timeouts on moves. // uuid := "" // We don't have a uuid, as we're not a node. begIndexDefs, begNodeDefs, begPlanPIndexes, begPlanPIndexesCAS, err := cbgt.PlannerGetPlan(cfg, version, uuid) if err != nil { return nil, err } nodesAll, nodesToAdd, nodesToRemove, nodeWeights, nodeHierarchy := cbgt.CalcNodesLayout(begIndexDefs, begNodeDefs, begPlanPIndexes) nodesUnknown := cbgt.StringsRemoveStrings(nodesToRemoveParam, nodesAll) if len(nodesUnknown) > 0 { return nil, fmt.Errorf("rebalance:"+ " unknown nodes in nodesToRemoveParam: %#v", nodesUnknown) } nodesToRemove = append(nodesToRemove, nodesToRemoveParam...) nodesToRemove = cbgt.StringsIntersectStrings(nodesToRemove, nodesToRemove) nodesToAdd = cbgt.StringsRemoveStrings(nodesToAdd, nodesToRemove) // -------------------------------------------------------- urlUUIDs := monitor.NodeDefsUrlUUIDs(begNodeDefs) monitorSampleCh := make(chan monitor.MonitorSample) monitorOptions := monitor.MonitorNodesOptions{ DiagSampleDisable: true, HttpGet: options.HttpGet, } monitorInst, err := monitor.StartMonitorNodes(urlUUIDs, monitorSampleCh, monitorOptions) if err != nil { return nil, err } // -------------------------------------------------------- stopCh := make(chan struct{}) r := &Rebalancer{ version: version, cfg: cfg, server: server, options: options, progressCh: make(chan RebalanceProgress), monitor: monitorInst, monitorDoneCh: make(chan struct{}), monitorSampleCh: monitorSampleCh, monitorSampleWantCh: make(chan chan monitor.MonitorSample), nodesAll: nodesAll, nodesToAdd: nodesToAdd, nodesToRemove: nodesToRemove, nodeWeights: nodeWeights, nodeHierarchy: nodeHierarchy, begIndexDefs: begIndexDefs, begNodeDefs: begNodeDefs, begPlanPIndexes: begPlanPIndexes, begPlanPIndexesCAS: begPlanPIndexesCAS, endPlanPIndexes: cbgt.NewPlanPIndexes(version), currStates: map[string]map[string]map[string]StateOp{}, currSeqs: map[string]map[string]map[string]cbgt.UUIDSeq{}, wantSeqs: map[string]map[string]map[string]cbgt.UUIDSeq{}, stopCh: stopCh, } r.Logf("rebalance: nodesAll: %#v", nodesAll) r.Logf("rebalance: nodesToAdd: %#v", nodesToAdd) r.Logf("rebalance: nodesToRemove: %#v", nodesToRemove) r.Logf("rebalance: nodeWeights: %#v", nodeWeights) r.Logf("rebalance: nodeHierarchy: %#v", nodeHierarchy) // r.Logf("rebalance: begIndexDefs: %#v", begIndexDefs) // r.Logf("rebalance: begNodeDefs: %#v", begNodeDefs) r.Logf("rebalance: monitor urlUUIDs: %#v", urlUUIDs) // begPlanPIndexesJSON, _ := json.Marshal(begPlanPIndexes) // // r.Logf("rebalance: begPlanPIndexes: %s, cas: %v", // begPlanPIndexesJSON, begPlanPIndexesCAS) // TODO: Prepopulate currStates so that we can double-check that // our state transitions in assignPartition are valid. go r.runMonitor(stopCh) go r.runRebalanceIndexes(stopCh) return r, nil }
// Failover promotes replicas to primary for the remaining nodes. func Failover(cfg cbgt.Cfg, version string, server string, nodesFailover []string) (bool, error) { mapNodesFailover := cbgt.StringsToMap(nodesFailover) uuid := "" indexDefs, nodeDefs, planPIndexesPrev, cas, err := cbgt.PlannerGetPlan(cfg, version, uuid) if err != nil { return false, err } planPIndexesCalc, err := cbgt.CalcPlan("failover", indexDefs, nodeDefs, planPIndexesPrev, version, server) if err != nil { return false, fmt.Errorf("planner: failover CalcPlan, err: %v", err) } planPIndexesNext := cbgt.CopyPlanPIndexes(planPIndexesPrev, version) for planPIndexName, planPIndex := range planPIndexesNext.PlanPIndexes { for node, planPIndexNode := range planPIndex.Nodes { if !mapNodesFailover[node] { continue } if planPIndexNode.Priority <= 0 { // Failover'ed node used to be a primary for this // pindex, so find a replica to promote. promoted := "" PROMOTE_REPLICA: for nodePro, ppnPro := range planPIndex.Nodes { if mapNodesFailover[nodePro] { continue } if ppnPro.Priority >= 1 { ppnPro.Priority = 0 planPIndex.Nodes[nodePro] = ppnPro promoted = nodePro break PROMOTE_REPLICA } } if promoted == "" { // Didn't find a replica to promote, so consult the // calculated plan for the primary assignment. planPIndexCalc, exists := planPIndexesCalc.PlanPIndexes[planPIndexName] if exists && planPIndexCalc != nil { PROMOTE_CALC: for nodeCalc, ppnCalc := range planPIndexCalc.Nodes { if ppnCalc.Priority <= 0 && !mapNodesFailover[nodeCalc] { planPIndex.Nodes[nodeCalc] = ppnCalc promoted = nodeCalc break PROMOTE_CALC } } } } } delete(planPIndex.Nodes, node) } } // TODO: Missing under-replication constraint warnings. if cbgt.SamePlanPIndexes(planPIndexesNext, planPIndexesPrev) { return false, nil } _, err = cbgt.CfgSetPlanPIndexes(cfg, planPIndexesNext, cas) if err != nil { return false, fmt.Errorf("planner: failover could not save plan,"+ " perhaps a concurrent planner won, cas: %d, err: %v", cas, err) } return true, nil }
func TestRebalance(t *testing.T) { testDir, _ := ioutil.TempDir("./tmp", "test") defer os.RemoveAll(testDir) nodeDir := func(node string) string { d := testDir + string(os.PathSeparator) + node os.MkdirAll(d, 0700) return d } var mut sync.Mutex httpGets := 0 httpGet := func(url string) (resp *http.Response, err error) { mut.Lock() httpGets++ mut.Unlock() return &http.Response{ StatusCode: 200, Body: ioutil.NopCloser(bytes.NewBuffer([]byte("{}"))), }, nil } tests := []struct { label string ops string // Space separated "+a", "-x". params map[string]string expNodes string // Space separated list of nodes ("a"..."v"). expIndexes string // Space separated list of indxes ("x"..."z"). expStartErr bool }{ {"1st node", "+a", nil, "a", "", true, }, {"add 1st index x", "+x", nil, "a", "x", false, }, {"add 2nd node b", "+b", nil, "a b", "x", false, }, {"add 2nd index y", "+y", nil, "a b", "x y", false, }, {"remove node b", "-b", nil, "a", "x y", false, }, } cfg := cbgt.NewCfgMem() mgrs := map[string]*cbgt.Manager{} var mgr0 *cbgt.Manager server := "." waitUntilEmptyCfgEvents := func(ch chan cbgt.CfgEvent) { for { select { case <-ch: default: return } } } cfgEventsNodeDefsWanted := make(chan cbgt.CfgEvent, 100) cfg.Subscribe(cbgt.NODE_DEFS_WANTED, cfgEventsNodeDefsWanted) waitUntilEmptyCfgEventsNodeDefsWanted := func() { waitUntilEmptyCfgEvents(cfgEventsNodeDefsWanted) } cfgEventsIndexDefs := make(chan cbgt.CfgEvent, 100) cfg.Subscribe(cbgt.INDEX_DEFS_KEY, cfgEventsIndexDefs) waitUntilEmptyCfgEventsIndexDefs := func() { waitUntilEmptyCfgEvents(cfgEventsIndexDefs) } for testi, test := range tests { log.Printf("testi: %d, label: %q", testi, test.label) checkCurrStatesIndexes := false nodesToRemove := []string(nil) for opi, op := range strings.Split(test.ops, " ") { log.Printf(" opi: %d, op: %s", opi, op) name := op[1:2] isIndexOp := name >= "x" if isIndexOp { indexName := name log.Printf(" indexOp: %s, indexName: %s", op[0:1], indexName) testCreateIndex(t, mgr0, indexName, test.params, waitUntilEmptyCfgEventsIndexDefs) checkCurrStatesIndexes = false } else { // It's a node op. nodeName := name log.Printf(" nodeOp: %s, nodeName: %s", op[0:1], nodeName) register := "wanted" if op[0:1] == "-" { register = "unknown" } if test.params["register"] != "" { register = test.params["register"] } if test.params[nodeName+".register"] != "" { register = test.params[nodeName+".register"] } if register == "unknown" { nodesToRemove = append(nodesToRemove, nodeName) // Delay actual unknown registration / removal // until after rebalance finishes. continue } waitUntilEmptyCfgEventsNodeDefsWanted() mgr, err := startNodeManager(nodeDir(nodeName), cfg, nodeName, register, test.params, server) if err != nil || mgr == nil { t.Errorf("expected no err, got: %#v", err) } if mgr0 == nil { mgr0 = mgr } mgrs[nodeName] = mgr mgr.Kick("kick") waitUntilEmptyCfgEventsNodeDefsWanted() checkCurrStatesIndexes = true } } r, err := StartRebalance(cbgt.VERSION, cfg, ".", nodesToRemove, RebalanceOptions{ HttpGet: httpGet, }, ) if (test.expStartErr && err == nil) || (!test.expStartErr && err != nil) { t.Errorf("testi: %d, label: %q,"+ " expStartErr: %v, but got: %v", testi, test.label, test.expStartErr, err) } if err != nil || r == nil { continue } progressCh := r.ProgressCh() if progressCh == nil { t.Errorf("expected progressCh") } err = nil for progress := range progressCh { if progress.Error != nil { err = progress.Error log.Printf("saw progress error: %#v\n", progress) } } r.Stop() if err != nil { t.Errorf("expected no end err, got: %v", err) } for _, nodeToRemove := range nodesToRemove { if mgrs[nodeToRemove] != nil { mgrs[nodeToRemove].Stop() delete(mgrs, nodeToRemove) } // TODO: Perhaps one day, the MCP will unregister the node; // for now, we unregister it "manually". if true { waitUntilEmptyCfgEventsNodeDefsWanted() mgr, err := startNodeManager(nodeDir(nodeToRemove), cfg, nodeToRemove, "unknown", test.params, server) if err != nil || mgr == nil { t.Errorf("expected no err, got: %#v", err) } mgr.Kick("kick") waitUntilEmptyCfgEventsNodeDefsWanted() mgr.Stop() } } endIndexDefs, endNodeDefs, endPlanPIndexes, endPlanPIndexesCAS, err := cbgt.PlannerGetPlan(cfg, cbgt.VERSION, "") if err != nil || endIndexDefs == nil || endNodeDefs == nil || endPlanPIndexes == nil || endPlanPIndexesCAS == 0 { t.Errorf("expected no err, got: %#v", err) } expNodes := strings.Split(test.expNodes, " ") if len(expNodes) != len(endNodeDefs.NodeDefs) { t.Errorf("len(expNodes) != len(endNodeDefs.NodeDefs), "+ " expNodes: %#v, endNodeDefs.NodeDefs: %#v", expNodes, endNodeDefs.NodeDefs) } for _, expNode := range expNodes { if endNodeDefs.NodeDefs[expNode] == nil { t.Errorf("didn't find expNode: %s,"+ " expNodes: %#v, endNodeDefs.NodeDefs: %#v", expNode, expNodes, endNodeDefs.NodeDefs) } } expIndexes := strings.Split(test.expIndexes, " ") r.Visit(func( currStates CurrStates, currSeqs CurrSeqs, wantSeqs WantSeqs, nextMoves map[string]*blance.NextMoves) { if !checkCurrStatesIndexes { return } if len(currStates) != len(expIndexes) { t.Errorf("test.label: %s,"+ " len(expIndexes) != len(currStates), "+ " expIndexes: %#v, currStates: %#v, endIndexDefs: %#v", test.label, expIndexes, currStates, endIndexDefs) } }) } }