Пример #1
0
// ChangeTypeNoRebuild changes a tablet's type, and returns whether
// there's a shard that should be rebuilt, along with its cell,
// keyspace, and shard. If force is true, it will bypass the vtaction
// system and make the data change directly, and not run the remote
// hooks.
//
// Note we don't update the master record in the Shard here, as we
// can't ChangeType from and out of master anyway.
func (wr *Wrangler) ChangeTypeNoRebuild(tabletAlias topo.TabletAlias, tabletType topo.TabletType, force bool) (rebuildRequired bool, cell, keyspace, shard string, err error) {
	// Load tablet to find keyspace and shard assignment.
	// Don't load after the ChangeType which might have unassigned
	// the tablet.
	ti, err := wr.ts.GetTablet(tabletAlias)
	if err != nil {
		return false, "", "", "", err
	}

	if force {
		if err := actionnode.ChangeType(wr.ts, tabletAlias, tabletType, nil, false); err != nil {
			return false, "", "", "", err
		}
	} else {
		if wr.UseRPCs {
			if err := wr.ai.RpcChangeType(ti, tabletType, wr.actionTimeout()); err != nil {
				return false, "", "", "", err
			}

		} else {
			// the remote action will run the hooks
			actionPath, err := wr.ai.ChangeType(tabletAlias, tabletType)
			if err != nil {
				return false, "", "", "", err
			}

			// You don't have a choice - you must wait for
			// completion before rebuilding.
			if err := wr.ai.WaitForCompletion(actionPath, wr.actionTimeout()); err != nil {
				return false, "", "", "", err
			}
		}
	}

	if !ti.Tablet.IsInServingGraph() {
		// re-read the tablet, see if we become serving
		ti, err = wr.ts.GetTablet(tabletAlias)
		if err != nil {
			return false, "", "", "", err
		}
		if !ti.Tablet.IsInServingGraph() {
			return false, "", "", "", nil
		}
	}
	return true, ti.Alias.Cell, ti.Keyspace, ti.Shard, nil

}
Пример #2
0
func (tm *TabletManager) ChangeType(context *rpcproto.Context, args *topo.TabletType, reply *rpc.UnusedResponse) error {
	return tm.agent.RpcWrapLockAction(context.RemoteAddr, actionnode.TABLET_ACTION_CHANGE_TYPE, args, reply, func() error {
		return actionnode.ChangeType(tm.agent.TopoServer, tm.agent.TabletAlias, *args, nil, true /*runHooks*/)
	})
}
Пример #3
0
// Operate on restore tablet.
// Check that the SnapshotManifest is valid and the master has not changed.
// Shutdown mysqld.
// Load the snapshot from source tablet.
// Restart mysqld and replication.
// Put tablet into the replication graph as a spare.
func (ta *TabletActor) restore(actionNode *actionnode.ActionNode) error {
	args := actionNode.Args.(*actionnode.RestoreArgs)

	// read our current tablet, verify its state
	tablet, err := ta.ts.GetTablet(ta.tabletAlias)
	if err != nil {
		return err
	}
	if args.WasReserved {
		if tablet.Type != topo.TYPE_RESTORE {
			return fmt.Errorf("expected restore type, not %v: %v", tablet.Type, ta.tabletAlias)
		}
	} else {
		if tablet.Type != topo.TYPE_IDLE {
			return fmt.Errorf("expected idle type, not %v: %v", tablet.Type, ta.tabletAlias)
		}
	}

	// read the source tablet, compute args.SrcFilePath if default
	sourceTablet, err := ta.ts.GetTablet(args.SrcTabletAlias)
	if err != nil {
		return err
	}
	if strings.ToLower(args.SrcFilePath) == "default" {
		args.SrcFilePath = path.Join(mysqlctl.SnapshotURLPath, mysqlctl.SnapshotManifestFile)
	}

	// read the parent tablet, verify its state
	parentTablet, err := ta.ts.GetTablet(args.ParentAlias)
	if err != nil {
		return err
	}
	if parentTablet.Type != topo.TYPE_MASTER && parentTablet.Type != topo.TYPE_SNAPSHOT_SOURCE {
		return fmt.Errorf("restore expected master or snapshot_source parent: %v %v", parentTablet.Type, args.ParentAlias)
	}

	// read & unpack the manifest
	sm := new(mysqlctl.SnapshotManifest)
	if err := fetchAndParseJsonFile(sourceTablet.GetAddr(), args.SrcFilePath, sm); err != nil {
		return err
	}

	if !args.WasReserved {
		if err := ta.changeTypeToRestore(tablet, sourceTablet, parentTablet.Alias, sourceTablet.KeyRange); err != nil {
			return err
		}
	}

	// do the work
	if err := ta.mysqld.RestoreFromSnapshot(sm, args.FetchConcurrency, args.FetchRetryCount, args.DontWaitForSlaveStart, ta.hookExtraEnv()); err != nil {
		log.Errorf("RestoreFromSnapshot failed (%v), scrapping", err)
		if err := actionnode.Scrap(ta.ts, ta.tabletAlias, false); err != nil {
			log.Errorf("Failed to Scrap after failed RestoreFromSnapshot: %v", err)
		}

		return err
	}

	// change to TYPE_SPARE, we're done!
	return actionnode.ChangeType(ta.ts, ta.tabletAlias, topo.TYPE_SPARE, nil, true)
}
Пример #4
0
func (ta *TabletActor) changeType(actionNode *actionnode.ActionNode) error {
	dbType := actionNode.Args.(*topo.TabletType)
	return actionnode.ChangeType(ta.ts, ta.tabletAlias, *dbType, nil, true /*runHooks*/)
}
Пример #5
0
// RunHealthCheck takes the action mutex, runs the health check,
// and if we need to change our state, do it.
// If we are the master, we don't change our type, healthy or not.
// If we are not the master, we change to spare if not healthy,
// or to the passed in targetTabletType if healthy.
//
// Note we only update the topo record if we need to, that is if our type or
// health details changed.
func (agent *ActionAgent) RunHealthCheck(targetTabletType topo.TabletType, lockTimeout time.Duration) {
	agent.actionMutex.Lock()
	defer agent.actionMutex.Unlock()

	// read the current tablet record
	agent.mutex.Lock()
	tablet := agent._tablet
	agent.mutex.Unlock()

	// run the health check
	typeForHealthCheck := targetTabletType
	if tablet.Type == topo.TYPE_MASTER {
		typeForHealthCheck = topo.TYPE_MASTER
	}
	health, err := health.Run(typeForHealthCheck)
	if len(health) == 0 {
		health = nil
	}

	// start with no change
	newTabletType := tablet.Type
	if err != nil {
		if tablet.Type != targetTabletType {
			log.Infof("Tablet not healthy and in state %v, not changing it: %v", tablet.Type, err)
			return
		}
		log.Infof("Tablet not healthy, converting it from %v to spare: %v", targetTabletType, err)
		newTabletType = topo.TYPE_SPARE
	} else {
		// We are healthy, maybe with health, see if we need
		// to update the record. We only change from spare to
		// our target type.
		if tablet.Type == topo.TYPE_SPARE {
			newTabletType = targetTabletType
		}
		if tablet.Type == newTabletType && reflect.DeepEqual(health, tablet.Health) {
			// no change in health, not logging anything,
			// and we're done
			return
		}

		// we need to update our state
		log.Infof("Updating tablet record as healthy type %v -> %v with health details %v -> %v", tablet.Type, newTabletType, tablet.Health, health)
	}

	// Change the Type, update the health
	if err := actionnode.ChangeType(agent.TopoServer, tablet.Alias, newTabletType, health, true /*runHooks*/); err != nil {
		log.Infof("Error updating tablet record: %v", err)
		return
	}

	// Rebuild the serving graph in our cell, only if we're dealing with
	// a serving type
	if topo.IsInServingGraph(targetTabletType) {
		// TODO: interrupted may need to be a global one closed when we exit
		interrupted := make(chan struct{})
		actionNode := actionnode.RebuildShard()
		lockPath, err := actionNode.LockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockTimeout, interrupted)
		if err != nil {
			log.Warningf("Cannot lock shard for rebuild: %v", err)
			return
		}
		err = topo.RebuildShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, topo.RebuildShardOptions{Cells: []string{tablet.Alias.Cell}, IgnorePartialResult: true})
		err = actionNode.UnlockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockPath, err)
		if err != nil {
			log.Warningf("UnlockShard returned an error: %v", err)
			return
		}
	}

	// run the post action callbacks
	agent.afterAction("healthcheck", false /* reloadSchema */)
}