// Scrap a tablet. If force is used, we write to topo.Server // directly and don't remote-execute the command. func (wr *Wrangler) Scrap(tabletAlias topo.TabletAlias, force, skipRebuild bool) (actionPath string, err error) { // load the tablet, see if we'll need to rebuild ti, err := wr.ts.GetTablet(tabletAlias) if err != nil { return "", err } rebuildRequired := ti.Tablet.IsServingType() if force { err = tm.Scrap(wr.ts, ti.Alias(), force) } else { actionPath, err = wr.ai.Scrap(ti.Alias()) } if err != nil { return "", err } if !rebuildRequired { relog.Info("Rebuild not required") return } if skipRebuild { relog.Warning("Rebuild required, but skipping it") return } // wait for the remote Scrap if necessary if actionPath != "" { err = wr.ai.WaitForCompletion(actionPath, wr.actionTimeout()) if err != nil { return "", err } } // and rebuild the original shard / keyspace return "", wr.RebuildShardGraph(ti.Keyspace, ti.Shard, []string{ti.Cell}) }
// Assume the master is dead and not coming back. Just push your way // forward. Force means we are reparenting to the same master // (assuming the data has been externally synched). func (wr *Wrangler) reparentShardBrutal(slaveTabletMap map[topo.TabletAlias]*topo.TabletInfo, failedMaster, masterElectTablet *topo.TabletInfo, leaveMasterReadOnly, force bool) error { log.Infof("Skipping ValidateShard - not a graceful situation") if _, ok := slaveTabletMap[masterElectTablet.Alias()]; !ok && !force { return fmt.Errorf("master elect tablet not in replication graph %v %v/%v %v", masterElectTablet.Alias(), failedMaster.Keyspace, failedMaster.Shard, mapKeys(slaveTabletMap)) } // Check the master-elect and slaves are in good shape when the action // has not been forced. if !force { // Make sure all tablets have the right parent and reasonable positions. if err := wr.checkSlaveReplication(slaveTabletMap, topo.NO_TABLET); err != nil { return err } // Check the master-elect is fit for duty - call out for hardware checks. if err := wr.checkMasterElect(masterElectTablet); err != nil { return err } log.Infof("check slaves %v/%v", masterElectTablet.Keyspace, masterElectTablet.Shard) restartableSlaveTabletMap := restartableTabletMap(slaveTabletMap) err := wr.checkSlaveConsistency(restartableSlaveTabletMap, nil) if err != nil { return err } } else { log.Infof("forcing reparent to same master %v", masterElectTablet.Alias()) err := wr.breakReplication(slaveTabletMap, masterElectTablet) if err != nil { return err } } rsd, err := wr.promoteSlave(masterElectTablet) if err != nil { // FIXME(msolomon) This suggests that the master-elect is dead. // We need to classify certain errors as temporary and retry. return fmt.Errorf("promote slave failed: %v %v", err, masterElectTablet.Alias()) } // Once the slave is promoted, remove it from our map delete(slaveTabletMap, masterElectTablet.Alias()) majorityRestart, restartSlaveErr := wr.restartSlaves(slaveTabletMap, rsd) if !force { log.Infof("scrap dead master %v", failedMaster.Alias()) // The master is dead so execute the action locally instead of // enqueing the scrap action for an arbitrary amount of time. if scrapErr := tm.Scrap(wr.ts, failedMaster.Alias(), false); scrapErr != nil { log.Warningf("scrapping failed master failed: %v", scrapErr) } } err = wr.finishReparent(failedMaster, masterElectTablet, majorityRestart, leaveMasterReadOnly) if err != nil { return err } if restartSlaveErr != nil { // This is more of a warning at this point. return restartSlaveErr } return nil }
// Scrap a tablet. If force is used, we write to topo.Server // directly and don't remote-execute the command. // // If we scrap the master for a shard, we will clear its record // from the Shard object (only if that was the right master) func (wr *Wrangler) Scrap(tabletAlias topo.TabletAlias, force, skipRebuild bool) (actionPath string, err error) { // load the tablet, see if we'll need to rebuild ti, err := wr.ts.GetTablet(tabletAlias) if err != nil { return "", err } rebuildRequired := ti.Tablet.IsServingType() wasMaster := ti.Type == topo.TYPE_MASTER if force { err = tm.Scrap(wr.ts, ti.Alias(), force) } else { actionPath, err = wr.ai.Scrap(ti.Alias()) } if err != nil { return "", err } if !rebuildRequired { log.Infof("Rebuild not required") return } if skipRebuild { log.Warningf("Rebuild required, but skipping it") return } // wait for the remote Scrap if necessary if actionPath != "" { err = wr.ai.WaitForCompletion(actionPath, wr.actionTimeout()) if err != nil { return "", err } } // update the Shard object if the master was scrapped if wasMaster { actionNode := wr.ai.UpdateShard() lockPath, err := wr.lockShard(ti.Keyspace, ti.Shard, actionNode) if err != nil { return "", err } // read the shard with the lock si, err := wr.ts.GetShard(ti.Keyspace, ti.Shard) if err != nil { return "", wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err) } // update it if the right alias is there if si.MasterAlias == tabletAlias { si.MasterAlias = topo.TabletAlias{} // write it back if err := wr.ts.UpdateShard(si); err != nil { return "", wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err) } } else { log.Warningf("Scrapping master %v from shard %v/%v but master in Shard object was %v", tabletAlias, ti.Keyspace, ti.Shard, si.MasterAlias) } // and unlock if err := wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err); err != nil { return "", err } } // and rebuild the original shard / keyspace return "", wr.RebuildShardGraph(ti.Keyspace, ti.Shard, []string{ti.Cell}) }