func init() { t, err := time.Parse(time.UnixDate, buildTime) if buildTime != "" && err != nil { panic(fmt.Sprintf("Couldn't parse build timestamp %q: %v", buildTime, err)) } stats.NewString("BuildHost").Set(buildHost) stats.NewString("BuildUser").Set(buildUser) stats.NewInt("BuildTimestamp").Set(t.Unix()) stats.NewString("BuildGitRev").Set(buildGitRev) }
func exportBinaryVersion() error { hasher := md5.New() exeFile, err := os.Open("/proc/self/exe") if err != nil { return err } if _, err = io.Copy(hasher, exeFile); err != nil { return err } md5sum := hex.EncodeToString(hasher.Sum(nil)) fileInfo, err := exeFile.Stat() if err != nil { return err } mtime := fileInfo.ModTime().Format(time.RFC3339) version := mtime + " " + md5sum stats.NewString("BinaryVersion").Set(version) // rexport this value for varz scraper // TODO(sougou): remove after varz migration stats.NewString("Version").Set(version) return nil }
func exportBinaryVersion() error { hasher := md5.New() exeFile, err := os.Open("/proc/self/exe") if err != nil { return err } if _, err = io.Copy(hasher, exeFile); err != nil { return err } md5sum := hex.EncodeToString(hasher.Sum(nil)) fileInfo, err := exeFile.Stat() if err != nil { return err } mtime := fileInfo.ModTime().Format(time.RFC3339) version := mtime + " " + md5sum stats.NewString("BinaryVersion").Set(version) return nil }
import ( "fmt" "reflect" "strings" log "github.com/golang/glog" "github.com/youtube/vitess/go/stats" "github.com/youtube/vitess/go/vt/binlog" "github.com/youtube/vitess/go/vt/tabletserver" "github.com/youtube/vitess/go/vt/tabletserver/planbuilder" "github.com/youtube/vitess/go/vt/topo" ) var ( // the stats exported by this module statsType = stats.NewString("TabletType") statsKeyspace = stats.NewString("TabletKeyspace") statsShard = stats.NewString("TabletShard") statsKeyRangeStart = stats.NewString("TabletKeyRangeStart") statsKeyRangeEnd = stats.NewString("TabletKeyRangeEnd") // constants for this module historyLength = 16 ) func (agent *ActionAgent) allowQueries(tablet *topo.Tablet) error { if agent.DBConfigs == nil { // test instance, do nothing return nil }
// NewActionAgent creates a new ActionAgent and registers all the // associated services. // // batchCtx is the context that the agent will use for any background tasks // it spawns. func NewActionAgent( batchCtx context.Context, mysqld mysqlctl.MysqlDaemon, queryServiceControl tabletserver.QueryServiceControl, tabletAlias topo.TabletAlias, dbcfgs *dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, securePort, gRPCPort int, overridesFile string, lockTimeout time.Duration, ) (agent *ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() agent = &ActionAgent{ QueryServiceControl: queryServiceControl, HealthReporter: health.DefaultAggregator, batchCtx: batchCtx, TopoServer: topoServer, TabletAlias: tabletAlias, MysqlDaemon: mysqld, DBConfigs: dbcfgs, SchemaOverrides: schemaOverrides, LockTimeout: lockTimeout, History: history.New(historyLength), lastHealthMapCount: stats.NewInt("LastHealthMapCount"), _healthy: fmt.Errorf("healthcheck not run yet"), healthStreamMap: make(map[int]chan<- *actionnode.HealthStreamReply), } // try to initialize the tablet if we have to if err := agent.InitTablet(port, securePort, gRPCPort); err != nil { return nil, fmt.Errorf("agent.InitTablet failed: %v", err) } // Publish and set the TargetTabletType. Not a global var // since it should never be changed. statsTabletType := stats.NewString("TargetTabletType") statsTabletType.Set(*targetTabletType) // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } if err := agent.Start(batchCtx, mysqlPort, port, securePort, gRPCPort); err != nil { return nil, err } // register the RPC services from the agent agent.registerQueryService() // two cases then: // - restoreFromBackup is set: we restore, then initHealthCheck, all // in the background // - restoreFromBackup is not set: we initHealthCheck right away if *restoreFromBackup { go func() { // restoreFromBackup wil just be a regular action // (same as if it was triggered remotely) if err := agent.RestoreFromBackup(batchCtx); err != nil { println(fmt.Sprintf("RestoreFromBackup failed: %v", err)) log.Fatalf("RestoreFromBackup failed: %v", err) } // after the restore is done, start health check agent.initHeathCheck() }() } else { // synchronously start health check if needed agent.initHeathCheck() } return agent, nil }
// Start validates and updates the topology records for the tablet, and performs // the initial state change callback to start tablet services. // If initUpdateStream is set, update stream service will also be registered. func (agent *ActionAgent) Start(ctx context.Context, mysqlPort, vtPort, gRPCPort int32, initUpdateStream bool) error { var err error if _, err = agent.updateTabletFromTopo(ctx); err != nil { return err } // find our hostname as fully qualified, and IP hostname := *tabletHostname if hostname == "" { hostname, err = netutil.FullyQualifiedHostname() if err != nil { return err } } ipAddrs, err := net.LookupHost(hostname) if err != nil { return err } ipAddr := ipAddrs[0] // Update bind addr for mysql and query service in the tablet node. f := func(tablet *pb.Tablet) error { tablet.Hostname = hostname tablet.Ip = ipAddr if tablet.PortMap == nil { tablet.PortMap = make(map[string]int32) } if mysqlPort != 0 { // only overwrite mysql port if we know it, otherwise // leave it as is. tablet.PortMap["mysql"] = mysqlPort } if vtPort != 0 { tablet.PortMap["vt"] = vtPort } else { delete(tablet.PortMap, "vt") } delete(tablet.PortMap, "vts") if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } else { delete(tablet.PortMap, "grpc") } return nil } if err := agent.TopoServer.UpdateTabletFields(ctx, agent.Tablet().Alias, f); err != nil { return err } // Reread to get the changes we just made tablet, err := agent.updateTabletFromTopo(ctx) if err != nil { return err } if err = agent.verifyTopology(ctx); err != nil { return err } if err = agent.verifyServingAddrs(ctx); err != nil { return err } // get and fix the dbname if necessary if !agent.DBConfigs.IsZero() { // Only for real instances // Update our DB config to match the info we have in the tablet if agent.DBConfigs.App.DbName == "" { agent.DBConfigs.App.DbName = topoproto.TabletDbName(tablet.Tablet) } agent.DBConfigs.App.Keyspace = tablet.Keyspace agent.DBConfigs.App.Shard = tablet.Shard } // create and register the RPC services from UpdateStream // (it needs the dbname, so it has to be delayed up to here, // but it has to be before updateState below that may use it) if initUpdateStream { us := binlog.NewUpdateStream(agent.MysqlDaemon, agent.DBConfigs.App.DbName) us.RegisterService() agent.UpdateStream = us } // initialize tablet server if err := agent.QueryServiceControl.InitDBConfig(pbq.Target{ Keyspace: tablet.Keyspace, Shard: tablet.Shard, TabletType: tablet.Type, }, agent.DBConfigs, agent.SchemaOverrides, agent.MysqlDaemon); err != nil { return fmt.Errorf("failed to InitDBConfig: %v", err) } // export a few static variables if agent.exportStats { statsKeyspace := stats.NewString("TabletKeyspace") statsShard := stats.NewString("TabletShard") statsKeyRangeStart := stats.NewString("TabletKeyRangeStart") statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd") statsKeyspace.Set(tablet.Keyspace) statsShard.Set(tablet.Shard) if key.KeyRangeIsPartial(tablet.KeyRange) { statsKeyRangeStart.Set(hex.EncodeToString(tablet.KeyRange.Start)) statsKeyRangeEnd.Set(hex.EncodeToString(tablet.KeyRange.End)) } } // initialize the key range query rule if err := agent.initializeKeyRangeRule(ctx, tablet.Keyspace, tablet.KeyRange); err != nil { return err } // and update our state oldTablet := &pb.Tablet{} if err = agent.updateState(ctx, oldTablet, "Start"); err != nil { log.Warningf("Initial updateState failed, will need a state change before running properly: %v", err) } return nil }
// called in a go routine. When the passed in context is canceled, Run() // should exit as soon as possible. Run(context.Context) error } var ( retryDuration = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)") executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls") remoteActionsTimeout = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)") useV3ReshardingMode = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.") healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology") healthcheckRetryDelay = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck") healthCheckTimeout = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period") statsState = stats.NewString("WorkerState") // statsRetryCount is the total number of times a query to vttablet had to be retried. statsRetryCount = stats.NewInt("WorkerRetryCount") // statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly". statsRetryCounters = stats.NewCounters("WorkerRetryCounters") // statsThrottledCounters is the number of times a write has been throttled, // grouped by (keyspace, shard, threadID). Mainly used for testing. // If throttling is enabled, this should always be non-zero for all threads. statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"}) // statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing. statsStateDurationsNs = stats.NewCounters("WorkerStateDurations") // statsOnlineInsertsCounters tracks for every table how many rows were // inserted during the online clone (reconciliation) phase. statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters") // statsOnlineUpdatesCounters tracks for every table how many rows were updated.
// NewActionAgent creates a new ActionAgent and registers all the // associated services. // // batchCtx is the context that the agent will use for any background tasks // it spawns. func NewActionAgent( batchCtx context.Context, mysqld mysqlctl.MysqlDaemon, queryServiceControl tabletserver.Controller, tabletAlias *topodatapb.TabletAlias, dbcfgs dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, gRPCPort int32, overridesFile string, ) (agent *ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() agent = &ActionAgent{ QueryServiceControl: queryServiceControl, HealthReporter: health.DefaultAggregator, batchCtx: batchCtx, TopoServer: topoServer, TabletAlias: tabletAlias, MysqlDaemon: mysqld, DBConfigs: dbcfgs, SchemaOverrides: schemaOverrides, History: history.New(historyLength), lastHealthMapCount: stats.NewInt("LastHealthMapCount"), _healthy: fmt.Errorf("healthcheck not run yet"), } agent.registerQueryRuleSources() // try to initialize the tablet if we have to if err := agent.InitTablet(port, gRPCPort); err != nil { return nil, fmt.Errorf("agent.InitTablet failed: %v", err) } // Publish and set the TargetTabletType. Not a global var // since it should never be changed. statsTabletType := stats.NewString("TargetTabletType") statsTabletType.Set(*targetTabletType) // Create the TabletType stats agent.exportStats = true agent.statsTabletType = stats.NewString("TabletType") // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient { return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered) }) // Stop all binlog players upon entering lameduck. servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } // Start will get the tablet info, and update our state from it if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil { return nil, err } // register the RPC services from the agent agent.registerQueryService() // two cases then: // - restoreFromBackup is set: we restore, then initHealthCheck, all // in the background // - restoreFromBackup is not set: we initHealthCheck right away if *restoreFromBackup { go func() { // restoreFromBackup wil just be a regular action // (same as if it was triggered remotely) if err := agent.RestoreFromBackup(batchCtx); err != nil { println(fmt.Sprintf("RestoreFromBackup failed: %v", err)) log.Fatalf("RestoreFromBackup failed: %v", err) } // after the restore is done, start health check agent.initHealthCheck() }() } else { // synchronously start health check if needed agent.initHealthCheck() } return agent, nil }
func init() { servenv.RegisterDefaultFlags() stats.NewString("BinaryName").Set("vtaction") }
// InitAgent initializes the agent within vttablet. func InitAgent( tabletAlias topo.TabletAlias, dbcfgs *dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, securePort int, overridesFile string, ) (agent *tabletmanager.ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() mysqld := mysqlctl.NewMysqld(mycnf, &dbcfgs.Dba, &dbcfgs.Repl) statsType := stats.NewString("TabletType") statsKeyspace := stats.NewString("TabletKeyspace") statsShard := stats.NewString("TabletShard") statsKeyRangeStart := stats.NewString("TabletKeyRangeStart") statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd") agent, err = tabletmanager.NewActionAgent(topoServer, tabletAlias, mysqld) if err != nil { return nil, err } // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = tabletmanager.NewBinlogPlayerMap(topoServer, &dbcfgs.App.ConnectionParams, mysqld) tabletmanager.RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // Action agent listens to changes in zookeeper and makes // modifications to this tablet. agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) { allowQuery := true var shardInfo *topo.ShardInfo var keyspaceInfo *topo.KeyspaceInfo if newTablet.Type == topo.TYPE_MASTER { // read the shard to get SourceShards shardInfo, err = topoServer.GetShard(newTablet.Keyspace, newTablet.Shard) if err != nil { log.Errorf("Cannot read shard for this tablet %v: %v", newTablet.Alias, err) } else { allowQuery = len(shardInfo.SourceShards) == 0 } // read the keyspace to get ShardingColumnType keyspaceInfo, err = topoServer.GetKeyspace(newTablet.Keyspace) switch err { case nil: // continue case topo.ErrNoNode: // backward compatible mode keyspaceInfo = topo.NewKeyspaceInfo(newTablet.Keyspace, &topo.Keyspace{}) default: log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err) keyspaceInfo = nil } } if newTablet.IsRunningQueryService() && allowQuery { if dbcfgs.App.DbName == "" { dbcfgs.App.DbName = newTablet.DbName() } dbcfgs.App.Keyspace = newTablet.Keyspace dbcfgs.App.Shard = newTablet.Shard if newTablet.Type != topo.TYPE_MASTER { dbcfgs.App.EnableInvalidator = true } else { dbcfgs.App.EnableInvalidator = false } // Transitioning from replica to master, first disconnect // existing connections. "false" indicateds that clients must // re-resolve their endpoint before reconnecting. if newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER { ts.DisallowQueries() } qrs := ts.LoadCustomRules() if newTablet.KeyRange.IsPartial() { qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY) qr.AddPlanCond(sqlparser.PLAN_INSERT_PK) err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange) if err != nil { log.Warningf("Unable to add keyspace rule: %v", err) } else { qrs.Add(qr) } } ts.AllowQueries(&dbcfgs.App, schemaOverrides, qrs, mysqld) // Disable before enabling to force existing streams to stop. binlog.DisableUpdateStreamService() binlog.EnableUpdateStreamService(dbcfgs) } else { ts.DisallowQueries() binlog.DisableUpdateStreamService() } statsType.Set(string(newTablet.Type)) statsKeyspace.Set(newTablet.Keyspace) statsShard.Set(newTablet.Shard) statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex())) statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex())) // See if we need to start or stop any binlog player if newTablet.Type == topo.TYPE_MASTER { agent.BinlogPlayerMap.RefreshMap(newTablet, keyspaceInfo, shardInfo) } else { agent.BinlogPlayerMap.StopAllPlayersAndReset() } }) if err := agent.Start(mysqld.Port(), port, securePort); err != nil { return nil, err } // register the RPC services from the agent agent.RegisterQueryService() return agent, nil }
func init() { stats.NewString("BinaryName").Set("vtaction") }
// InitAgent initializes the agent within vttablet. func InitAgent( tabletAlias topo.TabletAlias, dbcfgs dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, dbConfigsFile, dbCredentialsFile string, port, securePort int, mycnfFile, overridesFile string) (err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() mysqld := mysqlctl.NewMysqld(mycnf, dbcfgs.Dba, dbcfgs.Repl) // Start the binlog server service, disabled at start. binlogServer = mysqlctl.NewBinlogServer(mysqld) mysqlctl.RegisterBinlogServerService(binlogServer) // Start the binlog player services, not playing at start. binlogPlayerMap = NewBinlogPlayerMap(topoServer, dbcfgs.App.MysqlParams(), mysqld) RegisterBinlogPlayerMap(binlogPlayerMap) // Compute the bind addresses bindAddr := fmt.Sprintf(":%v", port) secureAddr := "" if securePort != 0 { secureAddr = fmt.Sprintf(":%v", securePort) } statsType := stats.NewString("TabletType") statsKeyspace := stats.NewString("TabletKeyspace") statsShard := stats.NewString("TabletShard") statsKeyRangeStart := stats.NewString("TabletKeyRangeStart") statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd") // Action agent listens to changes in zookeeper and makes // modifications to this tablet. agent, err = tm.NewActionAgent(topoServer, tabletAlias, mycnfFile, dbConfigsFile, dbCredentialsFile) if err != nil { return err } agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) { if newTablet.IsServingType() { if dbcfgs.App.DbName == "" { dbcfgs.App.DbName = newTablet.DbName() } dbcfgs.App.Keyspace = newTablet.Keyspace dbcfgs.App.Shard = newTablet.Shard // Transitioning from replica to master, first disconnect // existing connections. "false" indicateds that clients must // re-resolve their endpoint before reconnecting. if newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER { ts.DisallowQueries() } qrs := ts.LoadCustomRules() if newTablet.KeyRange.IsPartial() { qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY) qr.AddPlanCond(sqlparser.PLAN_INSERT_PK) err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange) if err != nil { log.Warningf("Unable to add keyspace rule: %v", err) } else { qrs.Add(qr) } } ts.AllowQueries(dbcfgs.App, schemaOverrides, qrs) mysqlctl.EnableUpdateStreamService(string(newTablet.Type), dbcfgs) if newTablet.Type != topo.TYPE_MASTER { ts.StartRowCacheInvalidation() } } else { ts.DisallowQueries() ts.StopRowCacheInvalidation() mysqlctl.DisableUpdateStreamService() } statsType.Set(string(newTablet.Type)) statsKeyspace.Set(newTablet.Keyspace) statsShard.Set(newTablet.Shard) statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex())) statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex())) // BinlogServer is only enabled for replicas if newTablet.Type == topo.TYPE_REPLICA { if !mysqlctl.IsBinlogServerEnabled(binlogServer) { mysqlctl.EnableBinlogServerService(binlogServer, dbcfgs.App.DbName) } } else { if mysqlctl.IsBinlogServerEnabled(binlogServer) { mysqlctl.DisableBinlogServerService(binlogServer) } } // See if we need to start or stop any binlog player if newTablet.Type == topo.TYPE_MASTER { binlogPlayerMap.RefreshMap(newTablet) } else { binlogPlayerMap.StopAllPlayers() } }) if err := agent.Start(bindAddr, secureAddr, mysqld.Addr()); err != nil { return err } // register the RPC services from the agent agent.RegisterQueryService(mysqld) return nil }
// Start validates and updates the topology records for the tablet, and performs // the initial state change callback to start tablet services. // If initUpdateStream is set, update stream service will also be registered. func (agent *ActionAgent) Start(ctx context.Context, mysqlPort, vtPort, gRPCPort int32, initUpdateStream bool) error { // find our hostname as fully qualified, and IP hostname := *tabletHostname if hostname == "" { var err error hostname, err = netutil.FullyQualifiedHostname() if err != nil { return err } } ipAddrs, err := net.LookupHost(hostname) if err != nil { return err } ipAddr := ipAddrs[0] // Update bind addr for mysql and query service in the tablet node. f := func(tablet *topodatapb.Tablet) error { tablet.Hostname = hostname tablet.Ip = ipAddr if tablet.PortMap == nil { tablet.PortMap = make(map[string]int32) } if mysqlPort != 0 { // only overwrite mysql port if we know it, otherwise // leave it as is. tablet.PortMap["mysql"] = mysqlPort } if vtPort != 0 { tablet.PortMap["vt"] = vtPort } else { delete(tablet.PortMap, "vt") } delete(tablet.PortMap, "vts") if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } else { delete(tablet.PortMap, "grpc") } // Save the original tablet for ownership tests later. agent.initialTablet = tablet return nil } if _, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias, f); err != nil { return err } // Verify the topology is correct. agent.verifyTopology(ctx) // Get and fix the dbname if necessary, only for real instances. if !agent.DBConfigs.IsZero() { dbname := topoproto.TabletDbName(agent.initialTablet) // Update our DB config to match the info we have in the tablet if agent.DBConfigs.App.DbName == "" { agent.DBConfigs.App.DbName = dbname } if agent.DBConfigs.Filtered.DbName == "" { agent.DBConfigs.Filtered.DbName = dbname } } // create and register the RPC services from UpdateStream // (it needs the dbname, so it has to be delayed up to here, // but it has to be before updateState below that may use it) if initUpdateStream { us := binlog.NewUpdateStream(agent.MysqlDaemon, agent.DBConfigs.App.DbName) agent.UpdateStream = us servenv.OnRun(func() { us.RegisterService() }) } servenv.OnTerm(func() { // Disable UpdateStream (if any) upon entering lameduck. // We do this regardless of initUpdateStream, since agent.UpdateStream // may have been set from elsewhere. if agent.UpdateStream != nil { agent.UpdateStream.Disable() } }) // initialize tablet server if err := agent.QueryServiceControl.InitDBConfig(querypb.Target{ Keyspace: agent.initialTablet.Keyspace, Shard: agent.initialTablet.Shard, TabletType: agent.initialTablet.Type, }, agent.DBConfigs, agent.MysqlDaemon); err != nil { return fmt.Errorf("failed to InitDBConfig: %v", err) } // export a few static variables if agent.exportStats { statsKeyspace := stats.NewString("TabletKeyspace") statsShard := stats.NewString("TabletShard") statsKeyRangeStart := stats.NewString("TabletKeyRangeStart") statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd") statsKeyspace.Set(agent.initialTablet.Keyspace) statsShard.Set(agent.initialTablet.Shard) if key.KeyRangeIsPartial(agent.initialTablet.KeyRange) { statsKeyRangeStart.Set(hex.EncodeToString(agent.initialTablet.KeyRange.Start)) statsKeyRangeEnd.Set(hex.EncodeToString(agent.initialTablet.KeyRange.End)) } } // Initialize the current tablet to match our current running // state: Has most field filled in, but type is UNKNOWN. // Subsequents calls to updateState or refreshTablet // will then work as expected. startingTablet := proto.Clone(agent.initialTablet).(*topodatapb.Tablet) startingTablet.Type = topodatapb.TabletType_UNKNOWN agent.setTablet(startingTablet) // run a background task to rebuild the SrvKeyspace in our cell/keyspace // if it doesn't exist yet go agent.maybeRebuildKeyspace(agent.initialTablet.Alias.Cell, agent.initialTablet.Keyspace) return nil }
// NewActionAgent creates a new ActionAgent and registers all the // associated services. // // batchCtx is the context that the agent will use for any background tasks // it spawns. func NewActionAgent( batchCtx context.Context, mysqld mysqlctl.MysqlDaemon, queryServiceControl tabletserver.Controller, tabletAlias *topodatapb.TabletAlias, dbcfgs dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, gRPCPort int32, ) (agent *ActionAgent, err error) { topoServer := topo.GetServer() orc, err := newOrcClient() if err != nil { return nil, err } agent = &ActionAgent{ QueryServiceControl: queryServiceControl, HealthReporter: health.DefaultAggregator, batchCtx: batchCtx, TopoServer: topoServer, TabletAlias: tabletAlias, MysqlDaemon: mysqld, DBConfigs: dbcfgs, History: history.New(historyLength), _healthy: fmt.Errorf("healthcheck not run yet"), orc: orc, } agent.registerQueryRuleSources() // try to initialize the tablet if we have to if err := agent.InitTablet(port, gRPCPort); err != nil { return nil, fmt.Errorf("agent.InitTablet failed: %v", err) } // Create the TabletType stats agent.exportStats = true agent.statsTabletType = stats.NewString("TabletType") // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient { return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered) }) // Stop all binlog players upon entering lameduck. servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } // Start will get the tablet info, and update our state from it if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil { return nil, err } // register the RPC services from the agent servenv.OnRun(func() { agent.registerQueryService() }) // two cases then: // - restoreFromBackup is set: we restore, then initHealthCheck, all // in the background // - restoreFromBackup is not set: we initHealthCheck right away if *restoreFromBackup { go func() { // restoreFromBackup wil just be a regular action // (same as if it was triggered remotely) if err := agent.RestoreData(batchCtx, logutil.NewConsoleLogger(), false /* deleteBeforeRestore */); err != nil { println(fmt.Sprintf("RestoreFromBackup failed: %v", err)) log.Fatalf("RestoreFromBackup failed: %v", err) } // after the restore is done, start health check agent.initHealthCheck() }() } else { // update our state if err := agent.refreshTablet(batchCtx, "Start"); err != nil { return nil, err } // synchronously start health check if needed agent.initHealthCheck() } // Start periodic Orchestrator self-registration, if configured. if agent.orc != nil { go agent.orc.DiscoverLoop(agent) } return agent, nil }
// Start validates and updates the topology records for the tablet, and performs // the initial state change callback to start tablet services. // If initUpdateStream is set, update stream service will also be registered. func (agent *ActionAgent) Start(ctx context.Context, mysqlPort, vtPort, gRPCPort int32, initUpdateStream bool) error { var err error if _, err = agent.updateTabletFromTopo(ctx); err != nil { return err } // find our hostname as fully qualified, and IP hostname := *tabletHostname if hostname == "" { hostname, err = netutil.FullyQualifiedHostname() if err != nil { return err } } ipAddrs, err := net.LookupHost(hostname) if err != nil { return err } ipAddr := ipAddrs[0] // Update bind addr for mysql and query service in the tablet node. f := func(tablet *topodatapb.Tablet) error { tablet.Hostname = hostname tablet.Ip = ipAddr if tablet.PortMap == nil { tablet.PortMap = make(map[string]int32) } if mysqlPort != 0 { // only overwrite mysql port if we know it, otherwise // leave it as is. tablet.PortMap["mysql"] = mysqlPort } if vtPort != 0 { tablet.PortMap["vt"] = vtPort } else { delete(tablet.PortMap, "vt") } delete(tablet.PortMap, "vts") if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } else { delete(tablet.PortMap, "grpc") } return nil } if _, err := agent.TopoServer.UpdateTabletFields(ctx, agent.Tablet().Alias, f); err != nil { return err } // Reread to get the changes we just made tablet, err := agent.updateTabletFromTopo(ctx) if err != nil { return err } // Save the original tablet record as it is now (at startup). agent.initialTablet = proto.Clone(tablet).(*topodatapb.Tablet) if err = agent.verifyTopology(ctx); err != nil { return err } // get and fix the dbname if necessary if !agent.DBConfigs.IsZero() { // Only for real instances // Update our DB config to match the info we have in the tablet if agent.DBConfigs.App.DbName == "" { agent.DBConfigs.App.DbName = topoproto.TabletDbName(tablet) } if agent.DBConfigs.Filtered.DbName == "" { agent.DBConfigs.Filtered.DbName = topoproto.TabletDbName(tablet) } agent.DBConfigs.App.Keyspace = tablet.Keyspace agent.DBConfigs.App.Shard = tablet.Shard } // create and register the RPC services from UpdateStream // (it needs the dbname, so it has to be delayed up to here, // but it has to be before updateState below that may use it) if initUpdateStream { us := binlog.NewUpdateStream(agent.MysqlDaemon, agent.DBConfigs.App.DbName) agent.UpdateStream = us servenv.OnRun(func() { us.RegisterService() }) } servenv.OnTerm(func() { // Disable UpdateStream (if any) upon entering lameduck. // We do this regardless of initUpdateStream, since agent.UpdateStream // may have been set from elsewhere. if agent.UpdateStream != nil { agent.UpdateStream.Disable() } }) // initialize tablet server if err := agent.QueryServiceControl.InitDBConfig(querypb.Target{ Keyspace: tablet.Keyspace, Shard: tablet.Shard, TabletType: tablet.Type, }, agent.DBConfigs, agent.MysqlDaemon); err != nil { return fmt.Errorf("failed to InitDBConfig: %v", err) } // export a few static variables if agent.exportStats { statsKeyspace := stats.NewString("TabletKeyspace") statsShard := stats.NewString("TabletShard") statsKeyRangeStart := stats.NewString("TabletKeyRangeStart") statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd") statsKeyspace.Set(tablet.Keyspace) statsShard.Set(tablet.Shard) if key.KeyRangeIsPartial(tablet.KeyRange) { statsKeyRangeStart.Set(hex.EncodeToString(tablet.KeyRange.Start)) statsKeyRangeEnd.Set(hex.EncodeToString(tablet.KeyRange.End)) } } // initialize the key range query rule if err := agent.initializeKeyRangeRule(ctx, tablet.Keyspace, tablet.KeyRange); err != nil { return err } // update our state oldTablet := &topodatapb.Tablet{} agent.updateState(ctx, oldTablet, "Start") // run a background task to rebuild the SrvKeyspace in our cell/keyspace // if it doesn't exist yet go agent.maybeRebuildKeyspace(tablet.Alias.Cell, tablet.Keyspace) return nil }
// InitAgent initializes the agent within vttablet. func InitAgent( tabletAlias topo.TabletAlias, dbcfgs *dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, securePort int, overridesFile string, ) (agent *tabletmanager.ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() mysqld := mysqlctl.NewMysqld(mycnf, &dbcfgs.Dba, &dbcfgs.Repl) statsType := stats.NewString("TabletType") statsKeyspace := stats.NewString("TabletKeyspace") statsShard := stats.NewString("TabletShard") statsKeyRangeStart := stats.NewString("TabletKeyRangeStart") statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd") agent, err = tabletmanager.NewActionAgent(topoServer, tabletAlias, mysqld) if err != nil { return nil, err } // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = tabletmanager.NewBinlogPlayerMap(topoServer, &dbcfgs.App.ConnectionParams, mysqld) tabletmanager.RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // Action agent listens to changes in zookeeper and makes // modifications to this tablet. agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) { allowQuery := true var shardInfo *topo.ShardInfo var keyspaceInfo *topo.KeyspaceInfo if newTablet.Type == topo.TYPE_MASTER { // read the shard to get SourceShards shardInfo, err = topoServer.GetShard(newTablet.Keyspace, newTablet.Shard) if err != nil { log.Errorf("Cannot read shard for this tablet %v: %v", newTablet.Alias, err) } else { allowQuery = len(shardInfo.SourceShards) == 0 } // read the keyspace to get ShardingColumnType keyspaceInfo, err = topoServer.GetKeyspace(newTablet.Keyspace) switch err { case nil: // continue case topo.ErrNoNode: // backward compatible mode keyspaceInfo = topo.NewKeyspaceInfo(newTablet.Keyspace, &topo.Keyspace{}) default: log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err) keyspaceInfo = nil } } if newTablet.IsRunningQueryService() && allowQuery { if dbcfgs.App.DbName == "" { dbcfgs.App.DbName = newTablet.DbName() } dbcfgs.App.Keyspace = newTablet.Keyspace dbcfgs.App.Shard = newTablet.Shard if newTablet.Type != topo.TYPE_MASTER { dbcfgs.App.EnableInvalidator = true } else { dbcfgs.App.EnableInvalidator = false } // There are a few transitions when we're // going to need to restart the query service: // - transitioning from replica to master, so clients // that were already connected don't keep on using // the master as replica or rdonly. // - having different parameters for the query // service. It needs to stop and restart with the // new parameters. That includes: // - changing KeyRange // - changing the BlacklistedTables list if (newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER) || (newTablet.KeyRange != oldTablet.KeyRange) || !reflect.DeepEqual(newTablet.BlacklistedTables, oldTablet.BlacklistedTables) { ts.DisallowQueries() } qrs := ts.LoadCustomRules() if newTablet.KeyRange.IsPartial() { qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY) qr.AddPlanCond(sqlparser.PLAN_INSERT_PK) err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange) if err != nil { log.Warningf("Unable to add keyspace rule: %v", err) } else { qrs.Add(qr) } } if len(newTablet.BlacklistedTables) > 0 { log.Infof("Blacklisting tables %v", strings.Join(newTablet.BlacklistedTables, ", ")) qr := ts.NewQueryRule("enforce blacklisted tables", "blacklisted_table", ts.QR_FAIL_QUERY) for _, t := range newTablet.BlacklistedTables { qr.AddTableCond(t) } qrs.Add(qr) } ts.AllowQueries(&dbcfgs.App, schemaOverrides, qrs, mysqld) // Disable before enabling to force existing streams to stop. binlog.DisableUpdateStreamService() binlog.EnableUpdateStreamService(dbcfgs) } else { ts.DisallowQueries() binlog.DisableUpdateStreamService() } statsType.Set(string(newTablet.Type)) statsKeyspace.Set(newTablet.Keyspace) statsShard.Set(newTablet.Shard) statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex())) statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex())) // See if we need to start or stop any binlog player if newTablet.Type == topo.TYPE_MASTER { agent.BinlogPlayerMap.RefreshMap(newTablet, keyspaceInfo, shardInfo) } else { agent.BinlogPlayerMap.StopAllPlayersAndReset() } }) if err := agent.Start(mysqld.Port(), port, securePort); err != nil { return nil, err } // register the RPC services from the agent agent.RegisterQueryService() // start health check if needed initHeathCheck(agent) return agent, nil }