Esempio n. 1
0
// Make this external, since these transitions need to be forced from time to time.
func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, runHooks bool) error {
	tablet, err := ts.GetTablet(tabletAlias)
	if err != nil {
		return err
	}

	if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) {
		return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias)
	}

	if runHooks {
		// Only run the preflight_serving_type hook when
		// transitioning from non-serving to serving.
		if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) {
			if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil {
				return err
			}
		}
	}

	tablet.Type = newType
	if newType == topo.TYPE_IDLE {
		if tablet.Parent.IsZero() {
			si, err := ts.GetShard(tablet.Keyspace, tablet.Shard)
			if err != nil {
				return err
			}
			rec := concurrency.AllErrorRecorder{}
			wg := sync.WaitGroup{}
			for _, cell := range si.Cells {
				wg.Add(1)
				go func(cell string) {
					defer wg.Done()
					sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard)
					if err != nil {
						log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell)
						return
					}
					for _, rl := range sri.ReplicationLinks {
						if rl.Parent == tabletAlias {
							rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell))
						}
					}
				}(cell)
			}
			wg.Wait()
			if rec.HasErrors() {
				return rec.Error()
			}
		}
		tablet.Parent = topo.TabletAlias{}
		tablet.Keyspace = ""
		tablet.Shard = ""
		tablet.KeyRange = key.KeyRange{}
	}
	return topo.UpdateTablet(ts, tablet)
}
Esempio n. 2
0
// ChangeSlaveType changes the type of tablet and recomputes all
// necessary derived paths in the serving graph, if necessary.
//
// Note we don't update the master record in the Shard here, as we
// can't ChangeType from and out of master anyway.
func (wr *Wrangler) ChangeSlaveType(ctx context.Context, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType) error {
	// Load tablet to find endpoint, and keyspace and shard assignment.
	ti, err := wr.ts.GetTablet(ctx, tabletAlias)
	if err != nil {
		return err
	}

	if !topo.IsTrivialTypeChange(ti.Type, tabletType) {
		return fmt.Errorf("tablet %v type change %v -> %v is not an allowed transition for ChangeSlaveType", tabletAlias, ti.Type, tabletType)
	}

	// ask the tablet to make the change
	if err := wr.tmc.ChangeType(ctx, ti, tabletType); err != nil {
		return err
	}

	// if the tablet was or is serving, rebuild the serving graph
	if ti.IsInServingGraph() || topo.IsInServingGraph(tabletType) {
		if _, err := wr.RebuildShardGraph(ctx, ti.Tablet.Keyspace, ti.Tablet.Shard, []string{ti.Tablet.Alias.Cell}); err != nil {
			return err
		}
	}

	return nil
}
Esempio n. 3
0
// rebuildShardIfNeeded will rebuild the serving graph if we need to
func (agent *ActionAgent) rebuildShardIfNeeded(tablet *topo.TabletInfo, targetTabletType topo.TabletType, lockTimeout time.Duration) error {
	if topo.IsInServingGraph(targetTabletType) {
		// TODO: interrupted may need to be a global one closed when we exit
		interrupted := make(chan struct{})

		if *topotools.UseSrvShardLocks {
			// no need to take the shard lock in this case
			if err := topotools.RebuildShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, topotools.RebuildShardOptions{Cells: []string{tablet.Alias.Cell}, IgnorePartialResult: true}, lockTimeout, interrupted); err != nil {
				return fmt.Errorf("topotools.RebuildShard returned an error: %v", err)
			}
		} else {
			actionNode := actionnode.RebuildShard()
			lockPath, err := actionNode.LockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockTimeout, interrupted)
			if err != nil {
				return fmt.Errorf("cannot lock shard for rebuild: %v", err)
			}
			err = topotools.RebuildShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, topotools.RebuildShardOptions{Cells: []string{tablet.Alias.Cell}, IgnorePartialResult: true}, lockTimeout, interrupted)
			err = actionNode.UnlockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockPath, err)
			if err != nil {
				return fmt.Errorf("UnlockShard returned an error: %v", err)
			}
		}
	}
	return nil
}
Esempio n. 4
0
// updateServingGraph will update the serving graph if we need to.
func (agent *ActionAgent) updateServingGraph(tablet *topo.TabletInfo, targetTabletType pbt.TabletType) error {
	if topo.IsInServingGraph(targetTabletType) {
		if err := topotools.UpdateTabletEndpoints(agent.batchCtx, agent.TopoServer, tablet.Tablet); err != nil {
			return fmt.Errorf("UpdateTabletEndpoints failed: %v", err)
		}
	}
	return nil
}
Esempio n. 5
0
// DeleteShard will do all the necessary changes in the topology server
// to entirely remove a shard.
func (wr *Wrangler) DeleteShard(ctx context.Context, keyspace, shard string, recursive bool) error {
	shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard)
	if err != nil {
		return err
	}

	tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard)
	if err != nil {
		return err
	}
	if recursive {
		wr.Logger().Infof("Deleting all tablets in shard %v/%v", keyspace, shard)
		for tabletAlias := range tabletMap {
			// We don't care about scrapping or updating the replication graph,
			// because we're about to delete the entire replication graph.
			wr.Logger().Infof("Deleting tablet %v", tabletAlias)
			if err := wr.TopoServer().DeleteTablet(ctx, tabletAlias); err != nil && err != topo.ErrNoNode {
				// Unlike the errors below in non-recursive steps, we don't want to
				// continue if a DeleteTablet fails. If we continue and delete the
				// replication graph, the tablet record will be orphaned, since we'll
				// no longer know it belongs to this shard.
				//
				// If the problem is temporary, or resolved externally, re-running
				// DeleteShard will skip over tablets that were already deleted.
				return fmt.Errorf("can't delete tablet %v: %v", tabletAlias, err)
			}
		}
	} else if len(tabletMap) > 0 {
		return fmt.Errorf("shard %v/%v still has %v tablets; use -recursive or remove them manually", keyspace, shard, len(tabletMap))
	}

	// remove the replication graph and serving graph in each cell
	for _, cell := range shardInfo.Cells {
		if err := wr.ts.DeleteShardReplication(ctx, cell, keyspace, shard); err != nil && err != topo.ErrNoNode {
			wr.Logger().Warningf("Cannot delete ShardReplication in cell %v for %v/%v: %v", cell, keyspace, shard, err)
		}

		for _, t := range topo.AllTabletTypes {
			if !topo.IsInServingGraph(t) {
				continue
			}

			if err := wr.ts.DeleteEndPoints(ctx, cell, keyspace, shard, t, -1); err != nil && err != topo.ErrNoNode {
				wr.Logger().Warningf("Cannot delete EndPoints in cell %v for %v/%v/%v: %v", cell, keyspace, shard, t, err)
			}
		}

		if err := wr.ts.DeleteSrvShard(ctx, cell, keyspace, shard); err != nil && err != topo.ErrNoNode {
			wr.Logger().Warningf("Cannot delete SrvShard in cell %v for %v/%v: %v", cell, keyspace, shard, err)
		}
	}

	return wr.ts.DeleteShard(ctx, keyspace, shard)
}
Esempio n. 6
0
// rebuildShardIfNeeded will rebuild the serving graph if we need to
func (agent *ActionAgent) rebuildShardIfNeeded(tablet *topo.TabletInfo, targetTabletType topo.TabletType) error {
	if topo.IsInServingGraph(targetTabletType) {
		// TODO: interrupted may need to be a global one closed when we exit
		interrupted := make(chan struct{})

		// no need to take the shard lock in this case
		if err := topotools.RebuildShard(logutil.NewConsoleLogger(), agent.TopoServer, tablet.Keyspace, tablet.Shard, []string{tablet.Alias.Cell}, agent.LockTimeout, interrupted); err != nil {
			return fmt.Errorf("topotools.RebuildShard returned an error: %v", err)
		}
	}
	return nil
}
Esempio n. 7
0
// VtctldSrvType returns the tablet type, possibly linked to the
// EndPoints page in vtctld.
func VtctldSrvType(cell, keyspace, shard string, tabletType topo.TabletType) template.HTML {
	if !topo.IsInServingGraph(tabletType) {
		return template.HTML(tabletType)
	}
	return MakeVtctldRedirect(string(tabletType), map[string]string{
		"type":        "srv_type",
		"cell":        cell,
		"keyspace":    keyspace,
		"shard":       shard,
		"tablet_type": string(tabletType),
	})
}
Esempio n. 8
0
// VtctldSrvType returns the tablet type, possibly linked to the
// EndPoints page in vtctld.
func VtctldSrvType(cell, keyspace, shard string, tabletType pb.TabletType) template.HTML {
	strTabletType := strings.ToLower(tabletType.String())
	if !topo.IsInServingGraph(tabletType) {
		return template.HTML(strTabletType)
	}
	return MakeVtctldRedirect(strTabletType, map[string]string{
		"type":        "srv_type",
		"cell":        cell,
		"keyspace":    keyspace,
		"shard":       shard,
		"tablet_type": strTabletType,
	})
}
Esempio n. 9
0
// UpdateTabletEndpoints fixes up any entries in the serving graph that relate
// to a given tablet.
func UpdateTabletEndpoints(ctx context.Context, ts topo.Server, tablet *pb.Tablet) (err error) {
	srvTypes, err := ts.GetSrvTabletTypesPerShard(ctx, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard)
	if err != nil {
		if err != topo.ErrNoNode {
			return err
		}
		// It's fine if there are no existing types.
		srvTypes = nil
	}

	wg := sync.WaitGroup{}
	errs := concurrency.AllErrorRecorder{}

	// Update the list that the tablet is supposed to be in (if any).
	if topo.IsInServingGraph(tablet.Type) {
		endpoint, err := topo.TabletEndPoint(tablet)
		if err != nil {
			return err
		}

		wg.Add(1)
		go func() {
			defer wg.Done()
			errs.RecordError(
				updateEndpoint(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard,
					tablet.Type, endpoint))
		}()
	}

	// Remove it from any other lists it isn't supposed to be in.
	for _, srvType := range srvTypes {
		if srvType != tablet.Type {
			wg.Add(1)
			go func(tabletType pb.TabletType) {
				defer wg.Done()
				errs.RecordError(
					removeEndpoint(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard,
						tabletType, tablet.Alias.Uid))
			}(srvType)
		}
	}

	wg.Wait()
	return errs.Error()
}
Esempio n. 10
0
// ChangeSlaveType changes the type of tablet and recomputes all
// necessary derived paths in the serving graph, if necessary.
//
// Note we don't update the master record in the Shard here, as we
// can't ChangeType from and out of master anyway.
func (wr *Wrangler) ChangeSlaveType(ctx context.Context, tabletAlias *pb.TabletAlias, tabletType pb.TabletType) error {
	// Load tablet to find endpoint, and keyspace and shard assignment.
	ti, err := wr.ts.GetTablet(ctx, tabletAlias)
	if err != nil {
		return err
	}

	// ask the tablet to make the change
	if err := wr.tmc.ChangeType(ctx, ti, tabletType); err != nil {
		return err
	}

	// if the tablet was or is serving, rebuild the serving graph
	if ti.IsInServingGraph() || topo.IsInServingGraph(tabletType) {
		if _, err := wr.RebuildShardGraph(ctx, ti.Tablet.Keyspace, ti.Tablet.Shard, []string{ti.Tablet.Alias.Cell}); err != nil {
			return err
		}
	}

	return nil
}
Esempio n. 11
0
// DeleteShard will do all the necessary changes in the topology server
// to entirely remove a shard. It can only work if there are no tablets
// in that shard.
func (wr *Wrangler) DeleteShard(keyspace, shard string) error {
	shardInfo, err := wr.ts.GetShard(keyspace, shard)
	if err != nil {
		return err
	}

	tabletMap, err := GetTabletMapForShard(wr.ts, keyspace, shard)
	if err != nil {
		return err
	}
	if len(tabletMap) > 0 {
		return fmt.Errorf("shard %v/%v still has %v tablets", keyspace, shard, len(tabletMap))
	}

	// remove the replication graph and serving graph in each cell
	for _, cell := range shardInfo.Cells {
		if err := wr.ts.DeleteShardReplication(cell, keyspace, shard); err != nil {
			log.Warningf("Cannot delete ShardReplication in cell %v for %v/%v: %v", cell, keyspace, shard, err)
		}

		for _, t := range topo.AllTabletTypes {
			if !topo.IsInServingGraph(t) {
				continue
			}

			if err := wr.ts.DeleteSrvTabletType(cell, keyspace, shard, t); err != nil && err != topo.ErrNoNode {
				log.Warningf("Cannot delete EndPoints in cell %v for %v/%v/%v: %v", cell, keyspace, shard, t, err)
			}
		}

		if err := wr.ts.DeleteSrvShard(cell, keyspace, shard); err != nil && err != topo.ErrNoNode {
			log.Warningf("Cannot delete SrvShard in cell %v for %v/%v: %v", cell, keyspace, shard, err)
		}
	}

	return wr.ts.DeleteShard(keyspace, shard)
}
Esempio n. 12
0
// RunHealthCheck takes the action mutex, runs the health check,
// and if we need to change our state, do it.
// If we are the master, we don't change our type, healthy or not.
// If we are not the master, we change to spare if not healthy,
// or to the passed in targetTabletType if healthy.
//
// Note we only update the topo record if we need to, that is if our type or
// health details changed.
func (agent *ActionAgent) RunHealthCheck(targetTabletType topo.TabletType, lockTimeout time.Duration) {
	agent.actionMutex.Lock()
	defer agent.actionMutex.Unlock()

	// read the current tablet record
	agent.mutex.Lock()
	tablet := agent._tablet
	agent.mutex.Unlock()

	// run the health check
	typeForHealthCheck := targetTabletType
	if tablet.Type == topo.TYPE_MASTER {
		typeForHealthCheck = topo.TYPE_MASTER
	}
	health, err := health.Run(typeForHealthCheck)

	// start with no change
	newTabletType := tablet.Type
	if err != nil {
		if tablet.Type != targetTabletType {
			log.Infof("Tablet not healthy and in state %v, not changing it: %v", tablet.Type, err)
			return
		}
		log.Infof("Tablet not healthy, converting it from %v to spare: %v", targetTabletType, err)
		newTabletType = topo.TYPE_SPARE
	} else {
		// We are healthy, maybe with health, see if we need
		// to update the record. We only change from spare to
		// our target type.
		if tablet.Type == topo.TYPE_SPARE {
			newTabletType = targetTabletType
		}
		if tablet.Type == newTabletType && tablet.IsHealthEqual(health) {
			// no change in health, not logging anything,
			// and we're done
			return
		}

		// we need to update our state
		log.Infof("Updating tablet record as healthy type %v -> %v with health details %v -> %v", tablet.Type, newTabletType, tablet.Health, health)
	}

	// Change the Type, update the health. Note we pass in a map
	// that's not nil, meaning if it's empty, we will clear it.
	if err := topotools.ChangeType(agent.TopoServer, tablet.Alias, newTabletType, health, true /*runHooks*/); err != nil {
		log.Infof("Error updating tablet record: %v", err)
		return
	}

	// Rebuild the serving graph in our cell, only if we're dealing with
	// a serving type
	if topo.IsInServingGraph(targetTabletType) {
		// TODO: interrupted may need to be a global one closed when we exit
		interrupted := make(chan struct{})

		if *topotools.UseSrvShardLocks {
			// no need to take the shard lock in this case
			if err := topotools.RebuildShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, topotools.RebuildShardOptions{Cells: []string{tablet.Alias.Cell}, IgnorePartialResult: true}, lockTimeout, interrupted); err != nil {
				log.Warningf("topotools.RebuildShard returned an error: %v", err)
				return
			}
		} else {
			actionNode := actionnode.RebuildShard()
			lockPath, err := actionNode.LockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockTimeout, interrupted)
			if err != nil {
				log.Warningf("Cannot lock shard for rebuild: %v", err)
				return
			}
			err = topotools.RebuildShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, topotools.RebuildShardOptions{Cells: []string{tablet.Alias.Cell}, IgnorePartialResult: true}, lockTimeout, interrupted)
			err = actionNode.UnlockShard(agent.TopoServer, tablet.Keyspace, tablet.Shard, lockPath, err)
			if err != nil {
				log.Warningf("UnlockShard returned an error: %v", err)
				return
			}
		}
	}

	// run the post action callbacks
	agent.afterAction("healthcheck", false /* reloadSchema */)
}