// changeCallback is run after every action that might // have changed something in the tablet record. func (agent *ActionAgent) changeCallback(ctx context.Context, oldTablet, newTablet *pbt.Tablet) error { span := trace.NewSpanFromContext(ctx) span.StartLocal("ActionAgent.changeCallback") defer span.Finish() allowQuery := topo.IsRunningQueryService(newTablet.Type) // Read the shard to get SourceShards / TabletControlMap if // we're going to use it. var shardInfo *topo.ShardInfo var tabletControl *pbt.Shard_TabletControl var blacklistedTables []string var err error var disallowQueryReason string if allowQuery { shardInfo, err = agent.TopoServer.GetShard(ctx, newTablet.Keyspace, newTablet.Shard) if err != nil { log.Errorf("Cannot read shard for this tablet %v, might have inaccurate SourceShards and TabletControls: %v", newTablet.Alias, err) } else { if newTablet.Type == pbt.TabletType_MASTER { if len(shardInfo.SourceShards) > 0 { allowQuery = false disallowQueryReason = "old master is still in shard info" } } if tc := shardInfo.GetTabletControl(newTablet.Type); tc != nil { if topo.InCellList(newTablet.Alias.Cell, tc.Cells) { if tc.DisableQueryService { allowQuery = false disallowQueryReason = "query service disabled by tablet control" } blacklistedTables = tc.BlacklistedTables tabletControl = tc } } } } else { disallowQueryReason = fmt.Sprintf("not a serving tablet type(%v)", newTablet.Type) } // Read the keyspace on masters to get ShardingColumnType, // for binlog replication, only if source shards are set. var keyspaceInfo *topo.KeyspaceInfo if newTablet.Type == pbt.TabletType_MASTER && shardInfo != nil && len(shardInfo.SourceShards) > 0 { keyspaceInfo, err = agent.TopoServer.GetKeyspace(ctx, newTablet.Keyspace) if err != nil { log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err) keyspaceInfo = nil } } if allowQuery { // There are a few transitions when we need to restart the query service: switch { // If either InitMaster or InitSlave was called, because those calls // (or a prior call to ResetReplication) may have silently broken the // rowcache invalidator by executing RESET MASTER. // Note that we don't care about fixing it after ResetReplication itself // since that call breaks everything on purpose, and we don't expect // anything to start working until either InitMaster or InitSlave. case agent.initReplication: agent.initReplication = false agent.stopQueryService("initialize replication") // Transitioning from replica to master, so clients that were already // connected don't keep on using the master as replica or rdonly. case newTablet.Type == pbt.TabletType_MASTER && oldTablet.Type != pbt.TabletType_MASTER: agent.stopQueryService("tablet promoted to master") // Having different parameters for the query service. // It needs to stop and restart with the new parameters. // That includes: // - changing KeyRange // - changing the BlacklistedTables list case (newTablet.KeyRange != oldTablet.KeyRange), !reflect.DeepEqual(blacklistedTables, agent.BlacklistedTables()): agent.stopQueryService("keyrange/blacklistedtables changed") } if err := agent.allowQueries(newTablet, blacklistedTables); err != nil { log.Errorf("Cannot start query service: %v", err) } } else { agent.stopQueryService(disallowQueryReason) } // save the tabletControl we've been using, so the background // healthcheck makes the same decisions as we've been making. agent.setTabletControl(tabletControl) // update stream needs to be started or stopped too if agent.DBConfigs != nil { if topo.IsRunningUpdateStream(newTablet.Type) { binlog.EnableUpdateStreamService(agent.DBConfigs.App.DbName, agent.MysqlDaemon) } else { binlog.DisableUpdateStreamService() } } statsType.Set(strings.ToLower(newTablet.Type.String())) statsKeyspace.Set(newTablet.Keyspace) statsShard.Set(newTablet.Shard) if newTablet.KeyRange != nil { statsKeyRangeStart.Set(hex.EncodeToString(newTablet.KeyRange.Start)) statsKeyRangeEnd.Set(hex.EncodeToString(newTablet.KeyRange.End)) } else { statsKeyRangeStart.Set("") statsKeyRangeEnd.Set("") } // See if we need to start or stop any binlog player if agent.BinlogPlayerMap != nil { if newTablet.Type == pbt.TabletType_MASTER { agent.BinlogPlayerMap.RefreshMap(agent.batchCtx, newTablet, keyspaceInfo, shardInfo) } else { agent.BinlogPlayerMap.StopAllPlayersAndReset() } } return nil }
// changeCallback is run after every action that might // have changed something in the tablet record or in the topology. func (agent *ActionAgent) changeCallback(ctx context.Context, oldTablet, newTablet *pbt.Tablet) error { span := trace.NewSpanFromContext(ctx) span.StartLocal("ActionAgent.changeCallback") defer span.Finish() allowQuery := topo.IsRunningQueryService(newTablet.Type) // Read the shard to get SourceShards / TabletControlMap if // we're going to use it. var shardInfo *topo.ShardInfo var tabletControl *pbt.Shard_TabletControl var blacklistedTables []string var err error var disallowQueryReason string if allowQuery { shardInfo, err = agent.TopoServer.GetShard(ctx, newTablet.Keyspace, newTablet.Shard) if err != nil { log.Errorf("Cannot read shard for this tablet %v, might have inaccurate SourceShards and TabletControls: %v", newTablet.Alias, err) } else { if newTablet.Type == pbt.TabletType_MASTER { if len(shardInfo.SourceShards) > 0 { allowQuery = false disallowQueryReason = "master tablet with filtered replication on" } } if tc := shardInfo.GetTabletControl(newTablet.Type); tc != nil { if topo.InCellList(newTablet.Alias.Cell, tc.Cells) { if tc.DisableQueryService { allowQuery = false disallowQueryReason = "query service disabled by tablet control" } blacklistedTables = tc.BlacklistedTables tabletControl = tc } } } } else { disallowQueryReason = fmt.Sprintf("not a serving tablet type(%v)", newTablet.Type) } if allowQuery { if err := agent.allowQueries(newTablet, blacklistedTables); err != nil { log.Errorf("Cannot start query service: %v", err) } } else { agent.disallowQueries(newTablet, disallowQueryReason) } // save the tabletControl we've been using, so the background // healthcheck makes the same decisions as we've been making. agent.setTabletControl(tabletControl) // update stream needs to be started or stopped too if topo.IsRunningUpdateStream(newTablet.Type) { agent.UpdateStream.Enable() } else { agent.UpdateStream.Disable() } statsType.Set(strings.ToLower(newTablet.Type.String())) statsKeyspace.Set(newTablet.Keyspace) statsShard.Set(newTablet.Shard) if newTablet.KeyRange != nil { statsKeyRangeStart.Set(hex.EncodeToString(newTablet.KeyRange.Start)) statsKeyRangeEnd.Set(hex.EncodeToString(newTablet.KeyRange.End)) } else { statsKeyRangeStart.Set("") statsKeyRangeEnd.Set("") } // See if we need to start or stop any binlog player if agent.BinlogPlayerMap != nil { if newTablet.Type == pbt.TabletType_MASTER { // Read the keyspace on masters to get // ShardingColumnType, for binlog replication, // only if source shards are set. var keyspaceInfo *topo.KeyspaceInfo if shardInfo != nil && len(shardInfo.SourceShards) > 0 { keyspaceInfo, err = agent.TopoServer.GetKeyspace(ctx, newTablet.Keyspace) if err != nil { keyspaceInfo = nil } } agent.BinlogPlayerMap.RefreshMap(agent.batchCtx, newTablet, keyspaceInfo, shardInfo) } else { agent.BinlogPlayerMap.StopAllPlayersAndReset() } } return nil }
// changeCallback is run after every action that might // have changed something in the tablet record or in the topology. // // It owns making changes to the BinlogPlayerMap. The input for this is the // tablet type (has to be master), and the shard's SourceShards. // // It owns updating the blacklisted tables. // // It owns updating the stats record for 'TabletType'. // // It owns starting and stopping the update stream service. // // It owns reading the TabletControl for the current tablet, and storing it. func (agent *ActionAgent) changeCallback(ctx context.Context, oldTablet, newTablet *topodatapb.Tablet) { span := trace.NewSpanFromContext(ctx) span.StartLocal("ActionAgent.changeCallback") defer span.Finish() allowQuery := topo.IsRunningQueryService(newTablet.Type) broadcastHealth := false runUpdateStream := allowQuery // Read the shard to get SourceShards / TabletControlMap if // we're going to use it. var shardInfo *topo.ShardInfo var err error var disallowQueryReason string var blacklistedTables []string updateBlacklistedTables := true if allowQuery { shardInfo, err = agent.TopoServer.GetShard(ctx, newTablet.Keyspace, newTablet.Shard) if err != nil { log.Errorf("Cannot read shard for this tablet %v, might have inaccurate SourceShards and TabletControls: %v", newTablet.Alias, err) updateBlacklistedTables = false } else { if newTablet.Type == topodatapb.TabletType_MASTER { if len(shardInfo.SourceShards) > 0 { allowQuery = false disallowQueryReason = "master tablet with filtered replication on" } } if tc := shardInfo.GetTabletControl(newTablet.Type); tc != nil { if topo.InCellList(newTablet.Alias.Cell, tc.Cells) { if tc.DisableQueryService { allowQuery = false disallowQueryReason = "TabletControl.DisableQueryService set" } blacklistedTables = tc.BlacklistedTables } } } } else { disallowQueryReason = fmt.Sprintf("not a serving tablet type(%v)", newTablet.Type) } agent.setServicesDesiredState(disallowQueryReason, runUpdateStream) if updateBlacklistedTables { if err := agent.loadBlacklistRules(newTablet, blacklistedTables); err != nil { // FIXME(alainjobart) how to handle this error? log.Errorf("Cannot update blacklisted tables rule: %v", err) } else { agent.setBlacklistedTables(blacklistedTables) } } if allowQuery { // Query service should be running. if oldTablet.Type == topodatapb.TabletType_REPLICA && newTablet.Type == topodatapb.TabletType_MASTER { // When promoting from replica to master, allow both master and replica // queries to be served during gracePeriod. if _, err := agent.QueryServiceControl.SetServingType(newTablet.Type, true, []topodatapb.TabletType{oldTablet.Type}); err == nil { // If successful, broadcast to vtgate and then wait. agent.broadcastHealth() time.Sleep(*gracePeriod) } else { log.Errorf("Can't start query service for MASTER+REPLICA mode: %v", err) } } if stateChanged, err := agent.QueryServiceControl.SetServingType(newTablet.Type, true, nil); err == nil { // If the state changed, broadcast to vtgate. // (e.g. this happens when the tablet was already master, but it just // changed from NOT_SERVING to SERVING due to // "vtctl MigrateServedFrom ... master".) if stateChanged { broadcastHealth = true } } else { runUpdateStream = false log.Errorf("Cannot start query service: %v", err) } } else { // Query service should be stopped. if topo.IsSubjectToLameduck(oldTablet.Type) && newTablet.Type == topodatapb.TabletType_SPARE && *gracePeriod > 0 { // When a non-MASTER serving type is going SPARE, // put query service in lameduck during gracePeriod. agent.lameduck(disallowQueryReason) } log.Infof("Disabling query service on type change, reason: %v", disallowQueryReason) if stateChanged, err := agent.QueryServiceControl.SetServingType(newTablet.Type, false, nil); err == nil { // If the state changed, broadcast to vtgate. // (e.g. this happens when the tablet was already master, but it just // changed from SERVING to NOT_SERVING because filtered replication was // enabled.) if stateChanged { broadcastHealth = true } } else { log.Errorf("SetServingType(serving=false) failed: %v", err) } } // update stream needs to be started or stopped too if topo.IsRunningUpdateStream(newTablet.Type) && runUpdateStream { agent.UpdateStream.Enable() } else { agent.UpdateStream.Disable() } // upate the stats to our current type if agent.exportStats { agent.statsTabletType.Set(topoproto.TabletTypeLString(newTablet.Type)) } // See if we need to start or stop any binlog player if agent.BinlogPlayerMap != nil { if newTablet.Type == topodatapb.TabletType_MASTER { agent.BinlogPlayerMap.RefreshMap(agent.batchCtx, newTablet, shardInfo) } else { agent.BinlogPlayerMap.StopAllPlayersAndReset() } } // Broadcast health changes to vtgate immediately. if broadcastHealth { agent.broadcastHealth() } }
// changeCallback is run after every action that might // have changed something in the tablet record or in the topology. // // It owns making changes to the BinlogPlayerMap. The input for this is the // tablet type (has to be master), and the shard's SourceShards. // // It owns updating the blacklisted tables. // // It owns updating the stats record for 'TabletType'. // // It owns starting and stopping the update stream service. // // It owns reading the TabletControl for the current tablet, and storing it. func (agent *ActionAgent) changeCallback(ctx context.Context, oldTablet, newTablet *topodatapb.Tablet) error { span := trace.NewSpanFromContext(ctx) span.StartLocal("ActionAgent.changeCallback") defer span.Finish() allowQuery := topo.IsRunningQueryService(newTablet.Type) // Read the shard to get SourceShards / TabletControlMap if // we're going to use it. var shardInfo *topo.ShardInfo var tabletControl *topodatapb.Shard_TabletControl var err error var disallowQueryReason string var blacklistedTables []string updateBlacklistedTables := true if allowQuery { shardInfo, err = agent.TopoServer.GetShard(ctx, newTablet.Keyspace, newTablet.Shard) if err != nil { log.Errorf("Cannot read shard for this tablet %v, might have inaccurate SourceShards and TabletControls: %v", newTablet.Alias, err) updateBlacklistedTables = false } else { if newTablet.Type == topodatapb.TabletType_MASTER { if len(shardInfo.SourceShards) > 0 { allowQuery = false disallowQueryReason = "master tablet with filtered replication on" } } if tc := shardInfo.GetTabletControl(newTablet.Type); tc != nil { if topo.InCellList(newTablet.Alias.Cell, tc.Cells) { if tc.DisableQueryService { allowQuery = false disallowQueryReason = "query service disabled by tablet control" } blacklistedTables = tc.BlacklistedTables tabletControl = tc } } } } else { disallowQueryReason = fmt.Sprintf("not a serving tablet type(%v)", newTablet.Type) } if updateBlacklistedTables { if err := agent.loadBlacklistRules(newTablet, blacklistedTables); err != nil { // FIXME(alainjobart) how to handle this error? log.Errorf("Cannot update blacklisted tables rule: %v", err) } } if allowQuery { // Query service should be running. if oldTablet.Type == topodatapb.TabletType_REPLICA && newTablet.Type == topodatapb.TabletType_MASTER { // When promoting from replica to master, allow both master and replica // queries to be served during gracePeriod. if err := agent.QueryServiceControl.SetServingType(newTablet.Type, true, []topodatapb.TabletType{oldTablet.Type}); err != nil { log.Errorf("Can't start query service for MASTER+REPLICA mode: %v", err) } else { // If successful, broadcast to vtgate and then wait. agent.broadcastHealth() time.Sleep(*gracePeriod) } } if err := agent.allowQueries(newTablet.Type); err != nil { log.Errorf("Cannot start query service: %v", err) } } else { // Query service should be stopped. if (oldTablet.Type == topodatapb.TabletType_REPLICA || oldTablet.Type == topodatapb.TabletType_RDONLY) && newTablet.Type == topodatapb.TabletType_SPARE { // When a non-MASTER serving type is going SPARE, // put query service in lameduck during gracePeriod. agent.enterLameduck(disallowQueryReason) agent.broadcastHealth() time.Sleep(*gracePeriod) } agent.disallowQueries(newTablet.Type, disallowQueryReason) } // save the tabletControl we've been using, so the background // healthcheck makes the same decisions as we've been making. agent.setTabletControl(tabletControl) // update stream needs to be started or stopped too if topo.IsRunningUpdateStream(newTablet.Type) { agent.UpdateStream.Enable() } else { agent.UpdateStream.Disable() } // upate the stats to our current type if agent.exportStats { agent.statsTabletType.Set(strings.ToLower(newTablet.Type.String())) } // See if we need to start or stop any binlog player if agent.BinlogPlayerMap != nil { if newTablet.Type == topodatapb.TabletType_MASTER { agent.BinlogPlayerMap.RefreshMap(agent.batchCtx, newTablet, shardInfo) } else { agent.BinlogPlayerMap.StopAllPlayersAndReset() } } return nil }