Example #1
0
// Scrap a tablet. If force is used, we write to topo.Server
// directly and don't remote-execute the command.
func (wr *Wrangler) Scrap(tabletAlias topo.TabletAlias, force, skipRebuild bool) (actionPath string, err error) {
	// load the tablet, see if we'll need to rebuild
	ti, err := wr.ts.GetTablet(tabletAlias)
	if err != nil {
		return "", err
	}
	rebuildRequired := ti.Tablet.IsServingType()

	if force {
		err = tm.Scrap(wr.ts, ti.Alias(), force)
	} else {
		actionPath, err = wr.ai.Scrap(ti.Alias())
	}
	if err != nil {
		return "", err
	}

	if !rebuildRequired {
		relog.Info("Rebuild not required")
		return
	}
	if skipRebuild {
		relog.Warning("Rebuild required, but skipping it")
		return
	}

	// wait for the remote Scrap if necessary
	if actionPath != "" {
		err = wr.ai.WaitForCompletion(actionPath, wr.actionTimeout())
		if err != nil {
			return "", err
		}
	}

	// and rebuild the original shard / keyspace
	return "", wr.RebuildShardGraph(ti.Keyspace, ti.Shard, []string{ti.Cell})
}
// Assume the master is dead and not coming back. Just push your way
// forward.  Force means we are reparenting to the same master
// (assuming the data has been externally synched).
func (wr *Wrangler) reparentShardBrutal(slaveTabletMap map[topo.TabletAlias]*topo.TabletInfo, failedMaster, masterElectTablet *topo.TabletInfo, leaveMasterReadOnly, force bool) error {
	log.Infof("Skipping ValidateShard - not a graceful situation")

	if _, ok := slaveTabletMap[masterElectTablet.Alias()]; !ok && !force {
		return fmt.Errorf("master elect tablet not in replication graph %v %v/%v %v", masterElectTablet.Alias(), failedMaster.Keyspace, failedMaster.Shard, mapKeys(slaveTabletMap))
	}

	// Check the master-elect and slaves are in good shape when the action
	// has not been forced.
	if !force {
		// Make sure all tablets have the right parent and reasonable positions.
		if err := wr.checkSlaveReplication(slaveTabletMap, topo.NO_TABLET); err != nil {
			return err
		}

		// Check the master-elect is fit for duty - call out for hardware checks.
		if err := wr.checkMasterElect(masterElectTablet); err != nil {
			return err
		}

		log.Infof("check slaves %v/%v", masterElectTablet.Keyspace, masterElectTablet.Shard)
		restartableSlaveTabletMap := restartableTabletMap(slaveTabletMap)
		err := wr.checkSlaveConsistency(restartableSlaveTabletMap, nil)
		if err != nil {
			return err
		}
	} else {
		log.Infof("forcing reparent to same master %v", masterElectTablet.Alias())
		err := wr.breakReplication(slaveTabletMap, masterElectTablet)
		if err != nil {
			return err
		}
	}

	rsd, err := wr.promoteSlave(masterElectTablet)
	if err != nil {
		// FIXME(msolomon) This suggests that the master-elect is dead.
		// We need to classify certain errors as temporary and retry.
		return fmt.Errorf("promote slave failed: %v %v", err, masterElectTablet.Alias())
	}

	// Once the slave is promoted, remove it from our map
	delete(slaveTabletMap, masterElectTablet.Alias())

	majorityRestart, restartSlaveErr := wr.restartSlaves(slaveTabletMap, rsd)

	if !force {
		log.Infof("scrap dead master %v", failedMaster.Alias())
		// The master is dead so execute the action locally instead of
		// enqueing the scrap action for an arbitrary amount of time.
		if scrapErr := tm.Scrap(wr.ts, failedMaster.Alias(), false); scrapErr != nil {
			log.Warningf("scrapping failed master failed: %v", scrapErr)
		}
	}

	err = wr.finishReparent(failedMaster, masterElectTablet, majorityRestart, leaveMasterReadOnly)
	if err != nil {
		return err
	}

	if restartSlaveErr != nil {
		// This is more of a warning at this point.
		return restartSlaveErr
	}

	return nil
}
Example #3
0
// Scrap a tablet. If force is used, we write to topo.Server
// directly and don't remote-execute the command.
//
// If we scrap the master for a shard, we will clear its record
// from the Shard object (only if that was the right master)
func (wr *Wrangler) Scrap(tabletAlias topo.TabletAlias, force, skipRebuild bool) (actionPath string, err error) {
	// load the tablet, see if we'll need to rebuild
	ti, err := wr.ts.GetTablet(tabletAlias)
	if err != nil {
		return "", err
	}
	rebuildRequired := ti.Tablet.IsServingType()
	wasMaster := ti.Type == topo.TYPE_MASTER

	if force {
		err = tm.Scrap(wr.ts, ti.Alias(), force)
	} else {
		actionPath, err = wr.ai.Scrap(ti.Alias())
	}
	if err != nil {
		return "", err
	}

	if !rebuildRequired {
		log.Infof("Rebuild not required")
		return
	}
	if skipRebuild {
		log.Warningf("Rebuild required, but skipping it")
		return
	}

	// wait for the remote Scrap if necessary
	if actionPath != "" {
		err = wr.ai.WaitForCompletion(actionPath, wr.actionTimeout())
		if err != nil {
			return "", err
		}
	}

	// update the Shard object if the master was scrapped
	if wasMaster {
		actionNode := wr.ai.UpdateShard()
		lockPath, err := wr.lockShard(ti.Keyspace, ti.Shard, actionNode)
		if err != nil {
			return "", err
		}

		// read the shard with the lock
		si, err := wr.ts.GetShard(ti.Keyspace, ti.Shard)
		if err != nil {
			return "", wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err)
		}

		// update it if the right alias is there
		if si.MasterAlias == tabletAlias {
			si.MasterAlias = topo.TabletAlias{}

			// write it back
			if err := wr.ts.UpdateShard(si); err != nil {
				return "", wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err)
			}
		} else {
			log.Warningf("Scrapping master %v from shard %v/%v but master in Shard object was %v", tabletAlias, ti.Keyspace, ti.Shard, si.MasterAlias)
		}

		// and unlock
		if err := wr.unlockShard(ti.Keyspace, ti.Shard, actionNode, lockPath, err); err != nil {
			return "", err
		}
	}

	// and rebuild the original shard / keyspace
	return "", wr.RebuildShardGraph(ti.Keyspace, ti.Shard, []string{ti.Cell})
}