Example #1
0
func updateReplicationGraphForPromotedSlave(ts topo.Server, tablet *topo.TabletInfo) error {
	// Remove tablet from the replication graph if this is not already the master.
	if tablet.Parent.Uid != topo.NO_TABLET {
		if err := topo.DeleteTabletReplicationData(ts, tablet.Tablet); err != nil && err != topo.ErrNoNode {
			return err
		}
	}

	// Update tablet regardless - trend towards consistency.
	tablet.State = topo.STATE_READ_WRITE
	tablet.Type = topo.TYPE_MASTER
	tablet.Parent.Cell = ""
	tablet.Parent.Uid = topo.NO_TABLET
	err := topo.UpdateTablet(ts, tablet)
	if err != nil {
		return err
	}
	// NOTE(msolomon) A serving graph update is required, but in
	// order for the shard to be consistent the old master must be
	// scrapped first. That is externally coordinated by the
	// wrangler reparent action.

	// Insert the new tablet location in the replication graph now that
	// we've updated the tablet.
	err = topo.CreateTabletReplicationData(ts, tablet.Tablet)
	if err != nil && err != topo.ErrNodeExists {
		return err
	}

	return nil
}
Example #2
0
func SlaveWasRestarted(ts topo.Server, tabletAlias topo.TabletAlias, swrd *actionnode.SlaveWasRestartedArgs) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	// Once this action completes, update authoritive tablet node first.
	tablet.Parent = swrd.Parent
	if tablet.Type == topo.TYPE_MASTER {
		tablet.Type = topo.TYPE_SPARE
		tablet.State = topo.STATE_READ_ONLY
	}
	err = topo.UpdateTablet(ts, tablet)
	if err != nil {
		return err
	}

	// Update the new tablet location in the replication graph now that
	// we've updated the tablet.
	err = topo.CreateTabletReplicationData(ts, tablet.Tablet)
	if err != nil && err != topo.ErrNodeExists {
		return err
	}

	return nil
}
Example #3
0
func (ta *TabletActor) SlaveWasRestarted(actionNode *ActionNode, masterAddr string) error {
	swrd := actionNode.args.(*SlaveWasRestartedData)

	tablet, err := ta.ts.GetTablet(ta.tabletAlias)
	if err != nil {
		return err
	}

	// Remove tablet from the replication graph.
	if err := topo.DeleteTabletReplicationData(ta.ts, tablet.Tablet); err != nil && err != topo.ErrNoNode {
		// FIXME(alainjobart) once we don't have replication paths
		// any more, remove this extra check
		if err == topo.ErrNotEmpty {
			log.Infof("Failed to delete master replication path, will be caught later")
		} else {
			return err
		}
	}

	// now we can check the reparent actually worked
	if masterAddr == "" {
		masterAddr, err = ta.mysqlDaemon.GetMasterAddr()
		if err != nil {
			return err
		}
	}
	if masterAddr != swrd.ExpectedMasterAddr && masterAddr != swrd.ExpectedMasterIpAddr {
		log.Errorf("slaveWasRestarted found unexpected master %v for %v (was expecting %v or %v)", masterAddr, ta.tabletAlias, swrd.ExpectedMasterAddr, swrd.ExpectedMasterIpAddr)
		if swrd.ScrapStragglers {
			return Scrap(ta.ts, tablet.Alias(), false)
		} else {
			return fmt.Errorf("Unexpected master %v for %v (was expecting %v or %v)", masterAddr, ta.tabletAlias, swrd.ExpectedMasterAddr, swrd.ExpectedMasterIpAddr)
		}
	}

	// Once this action completes, update authoritive tablet node first.
	tablet.Parent = swrd.Parent
	if tablet.Type == topo.TYPE_MASTER {
		tablet.Type = topo.TYPE_SPARE
		tablet.State = topo.STATE_READ_ONLY
	}
	err = topo.UpdateTablet(ta.ts, tablet)
	if err != nil {
		return err
	}

	// Insert the new tablet location in the replication graph now that
	// we've updated the tablet.
	err = topo.CreateTabletReplicationData(ta.ts, tablet.Tablet)
	if err != nil && err != topo.ErrNodeExists {
		return err
	}

	return nil
}
Example #4
0
func (ta *TabletActor) slaveWasRestarted(actionNode *ActionNode) error {
	swrd := actionNode.args.(*SlaveWasRestartedData)

	tablet, err := ta.ts.GetTablet(ta.tabletAlias)
	if err != nil {
		return err
	}

	// Remove tablet from the replication graph.
	if err := topo.DeleteTabletReplicationData(ta.ts, tablet.Tablet, tablet.ReplicationPath()); err != nil && err != topo.ErrNoNode {
		return err
	}

	// now we can check the reparent actually worked
	masterAddr, err := ta.mysqld.GetMasterAddr()
	if err != nil {
		return err
	}
	if masterAddr != swrd.ExpectedMasterAddr && masterAddr != swrd.ExpectedMasterIpAddr {
		log.Errorf("slaveWasRestarted found unexpected master %v for %v (was expecting %v or %v)", masterAddr, ta.tabletAlias, swrd.ExpectedMasterAddr, swrd.ExpectedMasterIpAddr)
		if swrd.ScrapStragglers {
			return Scrap(ta.ts, tablet.Alias(), false)
		} else {
			return fmt.Errorf("Unexpected master %v for %v (was expecting %v or %v)", masterAddr, ta.tabletAlias, swrd.ExpectedMasterAddr, swrd.ExpectedMasterIpAddr)
		}
	}

	// Once this action completes, update authoritive tablet node first.
	tablet.Parent = swrd.Parent
	if tablet.Type == topo.TYPE_MASTER {
		tablet.Type = topo.TYPE_SPARE
		tablet.State = topo.STATE_READ_ONLY
	}
	err = topo.UpdateTablet(ta.ts, tablet)
	if err != nil {
		return err
	}

	// Insert the new tablet location in the replication graph now that
	// we've updated the tablet.
	err = topo.CreateTabletReplicationData(ta.ts, tablet.Tablet)
	if err != nil && err != topo.ErrNodeExists {
		return err
	}

	return nil
}
Example #5
0
func SlaveWasRestarted(ts topo.Server, mysqlDaemon mysqlctl.MysqlDaemon, tabletAlias topo.TabletAlias, swrd *actionnode.SlaveWasRestartedArgs) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	// check the reparent actually worked
	masterAddr, err := mysqlDaemon.GetMasterAddr()
	if err != nil {
		return err
	}
	if masterAddr != swrd.ExpectedMasterAddr && masterAddr != swrd.ExpectedMasterIpAddr {
		log.Errorf("SlaveWasRestarted found unexpected master %v for %v (was expecting %v or %v)", masterAddr, tabletAlias, swrd.ExpectedMasterAddr, swrd.ExpectedMasterIpAddr)
		// Disabled for now
		// if swrd.ContinueOnUnexpectedMaster {
		//	log.Errorf("ContinueOnUnexpectedMaster is set, we keep going anyway")
		// } else
		if swrd.ScrapStragglers {
			return Scrap(ts, tablet.Alias, false)
		} else {
			return fmt.Errorf("Unexpected master %v for %v (was expecting %v or %v)", masterAddr, tabletAlias, swrd.ExpectedMasterAddr, swrd.ExpectedMasterIpAddr)
		}
	}

	// Once this action completes, update authoritive tablet node first.
	tablet.Parent = swrd.Parent
	if tablet.Type == topo.TYPE_MASTER {
		tablet.Type = topo.TYPE_SPARE
		tablet.State = topo.STATE_READ_ONLY
	}
	err = topo.UpdateTablet(ts, tablet)
	if err != nil {
		return err
	}

	// Update the new tablet location in the replication graph now that
	// we've updated the tablet.
	err = topo.CreateTabletReplicationData(ts, tablet.Tablet)
	if err != nil && err != topo.ErrNodeExists {
		return err
	}

	return nil
}
Example #6
0
// change a tablet type to RESTORE and set all the other arguments.
// from now on, we can go to:
// - back to IDLE if we don't use the tablet at all (after for instance
//   a successful ReserveForRestore but a failed Snapshot)
// - to SCRAP if something in the process on the target host fails
// - to SPARE if the clone works
func (ta *TabletActor) changeTypeToRestore(tablet, sourceTablet *topo.TabletInfo, parentAlias topo.TabletAlias, keyRange key.KeyRange) error {
	// run the optional preflight_assigned hook
	hk := hook.NewSimpleHook("preflight_assigned")
	topotools.ConfigureTabletHook(hk, ta.tabletAlias)
	if err := hk.ExecuteOptional(); err != nil {
		return err
	}

	// change the type
	tablet.Parent = parentAlias
	tablet.Keyspace = sourceTablet.Keyspace
	tablet.Shard = sourceTablet.Shard
	tablet.Type = topo.TYPE_RESTORE
	tablet.KeyRange = keyRange
	tablet.DbNameOverride = sourceTablet.DbNameOverride
	if err := topo.UpdateTablet(ta.ts, tablet); err != nil {
		return err
	}

	// and create the replication graph items
	return topo.CreateTabletReplicationData(ta.ts, tablet.Tablet)
}
Example #7
0
// This is a quick and dirty tool to resurrect the TopologyServer data from the
// canonical data stored in the tablet nodes.
//
// cells: local vt cells to scan for all tablets
// keyspaces: list of keyspaces to rebuild
func (wr *Wrangler) RebuildReplicationGraph(cells []string, keyspaces []string) error {
	if cells == nil || len(cells) == 0 {
		return fmt.Errorf("must specify cells to rebuild replication graph")
	}
	if keyspaces == nil || len(keyspaces) == 0 {
		return fmt.Errorf("must specify keyspaces to rebuild replication graph")
	}

	allTablets := make([]*topo.TabletInfo, 0, 1024)
	for _, cell := range cells {
		tablets, err := GetAllTablets(wr.ts, cell)
		if err != nil {
			return err
		}
		allTablets = append(allTablets, tablets...)
	}

	for _, keyspace := range keyspaces {
		log.V(6).Infof("delete keyspace shards: %v", keyspace)
		if err := wr.ts.DeleteKeyspaceShards(keyspace); err != nil {
			return err
		}
	}

	keyspacesToRebuild := make(map[string]bool)
	shardsCreated := make(map[string]bool)
	hasErr := false
	mu := sync.Mutex{}
	wg := sync.WaitGroup{}
	for _, ti := range allTablets {
		wg.Add(1)
		go func(ti *topo.TabletInfo) {
			defer wg.Done()
			if !ti.IsInReplicationGraph() {
				return
			}
			if !strInList(keyspaces, ti.Keyspace) {
				return
			}
			mu.Lock()
			keyspacesToRebuild[ti.Keyspace] = true
			shardPath := ti.Keyspace + "/" + ti.Shard
			if !shardsCreated[shardPath] {
				if err := topo.CreateShard(wr.ts, ti.Keyspace, ti.Shard); err != nil && err != topo.ErrNodeExists {
					log.Warningf("failed re-creating shard %v: %v", shardPath, err)
					hasErr = true
				} else {
					shardsCreated[shardPath] = true
				}
			}
			mu.Unlock()
			err := topo.CreateTabletReplicationData(wr.ts, ti.Tablet)
			if err != nil {
				mu.Lock()
				hasErr = true
				mu.Unlock()
				log.Warningf("failed creating replication path: %v", err)
			}
		}(ti)
	}
	wg.Wait()

	for keyspace := range keyspacesToRebuild {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			if err := wr.RebuildKeyspaceGraph(keyspace, nil); err != nil {
				mu.Lock()
				hasErr = true
				mu.Unlock()
				log.Warningf("RebuildKeyspaceGraph(%v) failed: %v", keyspace, err)
				return
			}
		}(keyspace)
	}
	wg.Wait()

	if hasErr {
		return fmt.Errorf("some errors occurred rebuilding replication graph, consult log")
	}
	return nil
}
Example #8
0
// InitTablet creates or updates a tablet. If no parent is specified
// in the tablet, and the tablet has a slave type, we will find the
// appropriate parent. If createShardAndKeyspace is true and the
// parent keyspace or shard don't exist, they will be created.  If
// update is true, and a tablet with the same ID exists, update it.
// If Force is true, and a tablet with the same ID already exists, it
// will be scrapped and deleted, and then recreated.
func (wr *Wrangler) InitTablet(tablet *topo.Tablet, force, createShardAndKeyspace, update bool) error {
	if err := tablet.Complete(); err != nil {
		return err
	}

	if tablet.IsInReplicationGraph() {
		// create the parent keyspace and shard if needed
		if createShardAndKeyspace {
			if err := wr.ts.CreateKeyspace(tablet.Keyspace, &topo.Keyspace{}); err != nil && err != topo.ErrNodeExists {
				return err
			}

			if err := topo.CreateShard(wr.ts, tablet.Keyspace, tablet.Shard); err != nil && err != topo.ErrNodeExists {
				return err
			}
		}

		// get the shard, checks a couple things
		si, err := wr.ts.GetShard(tablet.Keyspace, tablet.Shard)
		if err != nil {
			return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard")
		}
		if si.KeyRange != tablet.KeyRange {
			return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange)
		}
		if tablet.Type == topo.TYPE_MASTER && !si.MasterAlias.IsZero() && si.MasterAlias != tablet.Alias && !force {
			return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, tablet.Keyspace, tablet.Shard)
		}

		// see if we specified a parent, otherwise get it from the shard
		if tablet.Parent.IsZero() && tablet.Type.IsSlaveType() {
			if si.MasterAlias.IsZero() {
				return fmt.Errorf("trying to create tablet %v in shard %v/%v without a master", tablet.Alias, tablet.Keyspace, tablet.Shard)
			}
			tablet.Parent = si.MasterAlias
		}

		// update the shard record if needed
		if err := wr.updateShardCellsAndMaster(si, tablet.Alias, tablet.Type, force); err != nil {
			return err
		}
	}

	err := topo.CreateTablet(wr.ts, tablet)
	if err != nil && err == topo.ErrNodeExists {
		// Try to update nicely, but if it fails fall back to force behavior.
		if update || force {
			oldTablet, err := wr.ts.GetTablet(tablet.Alias)
			if err != nil {
				log.Warningf("failed reading tablet %v: %v", tablet.Alias, err)
			} else {
				if oldTablet.Keyspace == tablet.Keyspace && oldTablet.Shard == tablet.Shard {
					*(oldTablet.Tablet) = *tablet
					if err := topo.UpdateTablet(wr.ts, oldTablet); err != nil {
						log.Warningf("failed updating tablet %v: %v", tablet.Alias, err)
						// now fall through the Scrap case
					} else {
						if !tablet.IsInReplicationGraph() {
							return nil
						}

						if err := topo.CreateTabletReplicationData(wr.ts, tablet); err != nil {
							log.Warningf("failed updating tablet replication data for %v: %v", tablet.Alias, err)
							// now fall through the Scrap case
						} else {
							return nil
						}
					}
				}
			}
		}
		if force {
			if _, err = wr.Scrap(tablet.Alias, force, false); err != nil {
				log.Errorf("failed scrapping tablet %v: %v", tablet.Alias, err)
				return err
			}
			if err := wr.ts.DeleteTablet(tablet.Alias); err != nil {
				// we ignore this
				log.Errorf("failed deleting tablet %v: %v", tablet.Alias, err)
			}
			return topo.CreateTablet(wr.ts, tablet)
		}
	}
	return err
}
Example #9
0
func (ta *TabletActor) restartSlave(actionNode *actionnode.ActionNode) error {
	rsd := actionNode.Args.(*actionnode.RestartSlaveData)

	tablet, err := ta.ts.GetTablet(ta.tabletAlias)
	if err != nil {
		return err
	}

	// If this check fails, we seem reparented. The only part that
	// could have failed is the insert in the replication
	// graph. Do NOT try to reparent again. That will either wedge
	// replication or corrupt data.
	if tablet.Parent != rsd.Parent {
		log.V(6).Infof("restart with new parent")
		// Remove tablet from the replication graph.
		if err = topo.DeleteTabletReplicationData(ta.ts, tablet.Tablet); err != nil && err != topo.ErrNoNode {
			return err
		}

		// Move a lag slave into the orphan lag type so we can safely ignore
		// this reparenting until replication catches up.
		if tablet.Type == topo.TYPE_LAG {
			tablet.Type = topo.TYPE_LAG_ORPHAN
		} else {
			err = ta.mysqld.RestartSlave(rsd.ReplicationState, rsd.WaitPosition, rsd.TimePromoted)
			if err != nil {
				return err
			}
		}
		// Once this action completes, update authoritive tablet node first.
		tablet.Parent = rsd.Parent
		err = topo.UpdateTablet(ta.ts, tablet)
		if err != nil {
			return err
		}
	} else if rsd.Force {
		err = ta.mysqld.RestartSlave(rsd.ReplicationState, rsd.WaitPosition, rsd.TimePromoted)
		if err != nil {
			return err
		}
		// Complete the special orphan accounting.
		if tablet.Type == topo.TYPE_LAG_ORPHAN {
			tablet.Type = topo.TYPE_LAG
			err = topo.UpdateTablet(ta.ts, tablet)
			if err != nil {
				return err
			}
		}
	} else {
		// There is nothing to safely reparent, so check replication. If
		// either replication thread is not running, report an error.
		replicationPos, err := ta.mysqld.SlaveStatus()
		if err != nil {
			return fmt.Errorf("cannot verify replication for slave: %v", err)
		}
		if replicationPos.SecondsBehindMaster == myproto.InvalidLagSeconds {
			return fmt.Errorf("replication not running for slave")
		}
	}

	// Insert the new tablet location in the replication graph now that
	// we've updated the tablet.
	err = topo.CreateTabletReplicationData(ta.ts, tablet.Tablet)
	if err != nil && err != topo.ErrNodeExists {
		return err
	}

	return nil
}
Example #10
0
// InitTablet creates or updates a tablet. If no parent is specified
// in the tablet, and the tablet has a slave type, we will find the
// appropriate parent. If createShardAndKeyspace is true and the
// parent keyspace or shard don't exist, they will be created.  If
// update is true, and a tablet with the same ID exists, update it.
// If Force is true, and a tablet with the same ID already exists, it
// will be scrapped and deleted, and then recreated.
func (wr *Wrangler) InitTablet(tablet *topo.Tablet, force, createShardAndKeyspace, update bool) error {
	if err := tablet.Complete(); err != nil {
		return err
	}

	if tablet.IsInReplicationGraph() {
		// create the parent keyspace and shard if needed
		if createShardAndKeyspace {
			if err := wr.ts.CreateKeyspace(tablet.Keyspace); err != nil && err != topo.ErrNodeExists {
				return err
			}

			if err := topo.CreateShard(wr.ts, tablet.Keyspace, tablet.Shard); err != nil && err != topo.ErrNodeExists {
				return err
			}
		}

		// get the shard, checks a couple things
		si, err := wr.ts.GetShard(tablet.Keyspace, tablet.Shard)
		if err != nil {
			return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard")
		}
		if si.KeyRange != tablet.KeyRange {
			return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange)
		}
		if tablet.Type == topo.TYPE_MASTER && !si.MasterAlias.IsZero() && si.MasterAlias != tablet.Alias() && !force {
			return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, tablet.Keyspace, tablet.Shard)
		}

		// see if we specified a parent, otherwise get it from the shard
		if tablet.Parent.IsZero() && tablet.Type.IsSlaveType() {
			if si.MasterAlias.IsZero() {
				return fmt.Errorf("trying to create tablet %v in shard %v/%v without a master", tablet.Alias(), tablet.Keyspace, tablet.Shard)
			}
			tablet.Parent = si.MasterAlias
		}

		// See if we need to update the Shard:
		// - add the tablet's cell to the shard's Cells if needed
		// - change the master if needed
		shardUpdateRequired := false
		if !si.HasCell(tablet.Cell) {
			shardUpdateRequired = true
		}
		if tablet.Type == topo.TYPE_MASTER && si.MasterAlias != tablet.Alias() {
			shardUpdateRequired = true
		}

		if shardUpdateRequired {
			actionNode := wr.ai.UpdateShard()
			lockPath, err := wr.lockShard(tablet.Keyspace, tablet.Shard, actionNode)
			if err != nil {
				return err
			}

			// re-read the shard with the lock
			si, err = wr.ts.GetShard(tablet.Keyspace, tablet.Shard)
			if err != nil {
				return wr.unlockShard(tablet.Keyspace, tablet.Shard, actionNode, lockPath, err)
			}

			// update it
			wasUpdated := false
			if !si.HasCell(tablet.Cell) {
				si.Cells = append(si.Cells, tablet.Cell)
				wasUpdated = true
			}
			if tablet.Type == topo.TYPE_MASTER && si.MasterAlias != tablet.Alias() {
				if !si.MasterAlias.IsZero() && !force {
					return wr.unlockShard(tablet.Keyspace, tablet.Shard, actionNode, lockPath, fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, tablet.Keyspace, tablet.Shard))
				}
				si.MasterAlias = tablet.Alias()
				wasUpdated = true
			}

			if wasUpdated {
				// write it back
				if err := wr.ts.UpdateShard(si); err != nil {
					return wr.unlockShard(tablet.Keyspace, tablet.Shard, actionNode, lockPath, err)
				}
			}

			// and unlock
			if err := wr.unlockShard(tablet.Keyspace, tablet.Shard, actionNode, lockPath, err); err != nil {
				return err
			}

			// also create the cell's ShardReplication
			if err := wr.ts.CreateShardReplication(tablet.Cell, tablet.Keyspace, tablet.Shard, &topo.ShardReplication{}); err != nil && err != topo.ErrNodeExists {
				return err
			}
		}
	}

	err := topo.CreateTablet(wr.ts, tablet)
	if err != nil && err == topo.ErrNodeExists {
		// Try to update nicely, but if it fails fall back to force behavior.
		if update || force {
			oldTablet, err := wr.ts.GetTablet(tablet.Alias())
			if err != nil {
				log.Warningf("failed reading tablet %v: %v", tablet.Alias(), err)
			} else {
				if oldTablet.Keyspace == tablet.Keyspace && oldTablet.Shard == tablet.Shard {
					*(oldTablet.Tablet) = *tablet
					if err := topo.UpdateTablet(wr.ts, oldTablet); err != nil {
						log.Warningf("failed updating tablet %v: %v", tablet.Alias(), err)
						// now fall through the Scrap case
					} else {
						if !tablet.IsInReplicationGraph() {
							return nil
						}

						if err := topo.CreateTabletReplicationData(wr.ts, tablet); err != nil {
							log.Warningf("failed updating tablet replication data for %v: %v", tablet.Alias(), err)
							// now fall through the Scrap case
						} else {
							return nil
						}
					}
				}
			}
		}
		if force {
			if _, err = wr.Scrap(tablet.Alias(), force, false); err != nil {
				log.Errorf("failed scrapping tablet %v: %v", tablet.Alias(), err)
				return err
			}
			if err := wr.ts.DeleteTablet(tablet.Alias()); err != nil {
				// we ignore this
				log.Errorf("failed deleting tablet %v: %v", tablet.Alias(), err)
			}
			return topo.CreateTablet(wr.ts, tablet)
		}
	}
	return err
}