Example #1
0
func init() {
	t, err := time.Parse(time.UnixDate, buildTime)
	if buildTime != "" && err != nil {
		panic(fmt.Sprintf("Couldn't parse build timestamp %q: %v", buildTime, err))
	}
	stats.NewString("BuildHost").Set(buildHost)
	stats.NewString("BuildUser").Set(buildUser)
	stats.NewInt("BuildTimestamp").Set(t.Unix())
	stats.NewString("BuildGitRev").Set(buildGitRev)
}
Example #2
0
func exportBinaryVersion() error {
	hasher := md5.New()
	exeFile, err := os.Open("/proc/self/exe")
	if err != nil {
		return err
	}
	if _, err = io.Copy(hasher, exeFile); err != nil {
		return err
	}
	md5sum := hex.EncodeToString(hasher.Sum(nil))
	fileInfo, err := exeFile.Stat()
	if err != nil {
		return err
	}
	mtime := fileInfo.ModTime().Format(time.RFC3339)
	version := mtime + " " + md5sum
	stats.NewString("BinaryVersion").Set(version)
	// rexport this value for varz scraper
	// TODO(sougou): remove after varz migration
	stats.NewString("Version").Set(version)
	return nil
}
Example #3
0
func exportBinaryVersion() error {
	hasher := md5.New()
	exeFile, err := os.Open("/proc/self/exe")
	if err != nil {
		return err
	}
	if _, err = io.Copy(hasher, exeFile); err != nil {
		return err
	}
	md5sum := hex.EncodeToString(hasher.Sum(nil))
	fileInfo, err := exeFile.Stat()
	if err != nil {
		return err
	}
	mtime := fileInfo.ModTime().Format(time.RFC3339)
	version := mtime + " " + md5sum
	stats.NewString("BinaryVersion").Set(version)
	return nil
}
Example #4
0
import (
	"fmt"
	"reflect"
	"strings"

	log "github.com/golang/glog"
	"github.com/youtube/vitess/go/stats"
	"github.com/youtube/vitess/go/vt/binlog"
	"github.com/youtube/vitess/go/vt/tabletserver"
	"github.com/youtube/vitess/go/vt/tabletserver/planbuilder"
	"github.com/youtube/vitess/go/vt/topo"
)

var (
	// the stats exported by this module
	statsType          = stats.NewString("TabletType")
	statsKeyspace      = stats.NewString("TabletKeyspace")
	statsShard         = stats.NewString("TabletShard")
	statsKeyRangeStart = stats.NewString("TabletKeyRangeStart")
	statsKeyRangeEnd   = stats.NewString("TabletKeyRangeEnd")

	// constants for this module
	historyLength = 16
)

func (agent *ActionAgent) allowQueries(tablet *topo.Tablet) error {
	if agent.DBConfigs == nil {
		// test instance, do nothing
		return nil
	}
Example #5
0
// NewActionAgent creates a new ActionAgent and registers all the
// associated services.
//
// batchCtx is the context that the agent will use for any background tasks
// it spawns.
func NewActionAgent(
	batchCtx context.Context,
	mysqld mysqlctl.MysqlDaemon,
	queryServiceControl tabletserver.QueryServiceControl,
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort, gRPCPort int,
	overridesFile string,
	lockTimeout time.Duration,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()

	agent = &ActionAgent{
		QueryServiceControl: queryServiceControl,
		HealthReporter:      health.DefaultAggregator,
		batchCtx:            batchCtx,
		TopoServer:          topoServer,
		TabletAlias:         tabletAlias,
		MysqlDaemon:         mysqld,
		DBConfigs:           dbcfgs,
		SchemaOverrides:     schemaOverrides,
		LockTimeout:         lockTimeout,
		History:             history.New(historyLength),
		lastHealthMapCount:  stats.NewInt("LastHealthMapCount"),
		_healthy:            fmt.Errorf("healthcheck not run yet"),
		healthStreamMap:     make(map[int]chan<- *actionnode.HealthStreamReply),
	}

	// try to initialize the tablet if we have to
	if err := agent.InitTablet(port, securePort, gRPCPort); err != nil {
		return nil, fmt.Errorf("agent.InitTablet failed: %v", err)
	}

	// Publish and set the TargetTabletType. Not a global var
	// since it should never be changed.
	statsTabletType := stats.NewString("TargetTabletType")
	statsTabletType.Set(*targetTabletType)

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld)
	RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)
		}
	}

	if err := agent.Start(batchCtx, mysqlPort, port, securePort, gRPCPort); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.registerQueryService()

	// two cases then:
	// - restoreFromBackup is set: we restore, then initHealthCheck, all
	//   in the background
	// - restoreFromBackup is not set: we initHealthCheck right away
	if *restoreFromBackup {
		go func() {
			// restoreFromBackup wil just be a regular action
			// (same as if it was triggered remotely)
			if err := agent.RestoreFromBackup(batchCtx); err != nil {
				println(fmt.Sprintf("RestoreFromBackup failed: %v", err))
				log.Fatalf("RestoreFromBackup failed: %v", err)
			}

			// after the restore is done, start health check
			agent.initHeathCheck()
		}()
	} else {
		// synchronously start health check if needed
		agent.initHeathCheck()
	}

	return agent, nil
}
Example #6
0
// Start validates and updates the topology records for the tablet, and performs
// the initial state change callback to start tablet services.
// If initUpdateStream is set, update stream service will also be registered.
func (agent *ActionAgent) Start(ctx context.Context, mysqlPort, vtPort, gRPCPort int32, initUpdateStream bool) error {
	var err error
	if _, err = agent.updateTabletFromTopo(ctx); err != nil {
		return err
	}

	// find our hostname as fully qualified, and IP
	hostname := *tabletHostname
	if hostname == "" {
		hostname, err = netutil.FullyQualifiedHostname()
		if err != nil {
			return err
		}
	}
	ipAddrs, err := net.LookupHost(hostname)
	if err != nil {
		return err
	}
	ipAddr := ipAddrs[0]

	// Update bind addr for mysql and query service in the tablet node.
	f := func(tablet *pb.Tablet) error {
		tablet.Hostname = hostname
		tablet.Ip = ipAddr
		if tablet.PortMap == nil {
			tablet.PortMap = make(map[string]int32)
		}
		if mysqlPort != 0 {
			// only overwrite mysql port if we know it, otherwise
			// leave it as is.
			tablet.PortMap["mysql"] = mysqlPort
		}
		if vtPort != 0 {
			tablet.PortMap["vt"] = vtPort
		} else {
			delete(tablet.PortMap, "vt")
		}
		delete(tablet.PortMap, "vts")
		if gRPCPort != 0 {
			tablet.PortMap["grpc"] = gRPCPort
		} else {
			delete(tablet.PortMap, "grpc")
		}
		return nil
	}
	if err := agent.TopoServer.UpdateTabletFields(ctx, agent.Tablet().Alias, f); err != nil {
		return err
	}

	// Reread to get the changes we just made
	tablet, err := agent.updateTabletFromTopo(ctx)
	if err != nil {
		return err
	}

	if err = agent.verifyTopology(ctx); err != nil {
		return err
	}

	if err = agent.verifyServingAddrs(ctx); err != nil {
		return err
	}

	// get and fix the dbname if necessary
	if !agent.DBConfigs.IsZero() {
		// Only for real instances
		// Update our DB config to match the info we have in the tablet
		if agent.DBConfigs.App.DbName == "" {
			agent.DBConfigs.App.DbName = topoproto.TabletDbName(tablet.Tablet)
		}
		agent.DBConfigs.App.Keyspace = tablet.Keyspace
		agent.DBConfigs.App.Shard = tablet.Shard
	}

	// create and register the RPC services from UpdateStream
	// (it needs the dbname, so it has to be delayed up to here,
	// but it has to be before updateState below that may use it)
	if initUpdateStream {
		us := binlog.NewUpdateStream(agent.MysqlDaemon, agent.DBConfigs.App.DbName)
		us.RegisterService()
		agent.UpdateStream = us
	}

	// initialize tablet server
	if err := agent.QueryServiceControl.InitDBConfig(pbq.Target{
		Keyspace:   tablet.Keyspace,
		Shard:      tablet.Shard,
		TabletType: tablet.Type,
	}, agent.DBConfigs, agent.SchemaOverrides, agent.MysqlDaemon); err != nil {
		return fmt.Errorf("failed to InitDBConfig: %v", err)
	}

	// export a few static variables
	if agent.exportStats {
		statsKeyspace := stats.NewString("TabletKeyspace")
		statsShard := stats.NewString("TabletShard")
		statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
		statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

		statsKeyspace.Set(tablet.Keyspace)
		statsShard.Set(tablet.Shard)
		if key.KeyRangeIsPartial(tablet.KeyRange) {
			statsKeyRangeStart.Set(hex.EncodeToString(tablet.KeyRange.Start))
			statsKeyRangeEnd.Set(hex.EncodeToString(tablet.KeyRange.End))
		}
	}

	// initialize the key range query rule
	if err := agent.initializeKeyRangeRule(ctx, tablet.Keyspace, tablet.KeyRange); err != nil {
		return err
	}

	// and update our state
	oldTablet := &pb.Tablet{}
	if err = agent.updateState(ctx, oldTablet, "Start"); err != nil {
		log.Warningf("Initial updateState failed, will need a state change before running properly: %v", err)
	}
	return nil
}
Example #7
0
	// called in a go routine.  When the passed in context is canceled, Run()
	// should exit as soon as possible.
	Run(context.Context) error
}

var (
	retryDuration         = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)")
	executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls")
	remoteActionsTimeout  = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)")
	useV3ReshardingMode   = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.")

	healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology")
	healthcheckRetryDelay      = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck")
	healthCheckTimeout         = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period")

	statsState = stats.NewString("WorkerState")
	// statsRetryCount is the total number of times a query to vttablet had to be retried.
	statsRetryCount = stats.NewInt("WorkerRetryCount")
	// statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly".
	statsRetryCounters = stats.NewCounters("WorkerRetryCounters")
	// statsThrottledCounters is the number of times a write has been throttled,
	// grouped by (keyspace, shard, threadID). Mainly used for testing.
	// If throttling is enabled, this should always be non-zero for all threads.
	statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"})
	// statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing.
	statsStateDurationsNs = stats.NewCounters("WorkerStateDurations")

	// statsOnlineInsertsCounters tracks for every table how many rows were
	// inserted during the online clone (reconciliation) phase.
	statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were updated.
Example #8
0
// NewActionAgent creates a new ActionAgent and registers all the
// associated services.
//
// batchCtx is the context that the agent will use for any background tasks
// it spawns.
func NewActionAgent(
	batchCtx context.Context,
	mysqld mysqlctl.MysqlDaemon,
	queryServiceControl tabletserver.Controller,
	tabletAlias *topodatapb.TabletAlias,
	dbcfgs dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, gRPCPort int32,
	overridesFile string,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()

	agent = &ActionAgent{
		QueryServiceControl: queryServiceControl,
		HealthReporter:      health.DefaultAggregator,
		batchCtx:            batchCtx,
		TopoServer:          topoServer,
		TabletAlias:         tabletAlias,
		MysqlDaemon:         mysqld,
		DBConfigs:           dbcfgs,
		SchemaOverrides:     schemaOverrides,
		History:             history.New(historyLength),
		lastHealthMapCount:  stats.NewInt("LastHealthMapCount"),
		_healthy:            fmt.Errorf("healthcheck not run yet"),
	}
	agent.registerQueryRuleSources()

	// try to initialize the tablet if we have to
	if err := agent.InitTablet(port, gRPCPort); err != nil {
		return nil, fmt.Errorf("agent.InitTablet failed: %v", err)
	}

	// Publish and set the TargetTabletType. Not a global var
	// since it should never be changed.
	statsTabletType := stats.NewString("TargetTabletType")
	statsTabletType.Set(*targetTabletType)

	// Create the TabletType stats
	agent.exportStats = true
	agent.statsTabletType = stats.NewString("TabletType")

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient {
		return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered)
	})
	// Stop all binlog players upon entering lameduck.
	servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset)
	RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)
		}
	}

	// Start will get the tablet info, and update our state from it
	if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.registerQueryService()

	// two cases then:
	// - restoreFromBackup is set: we restore, then initHealthCheck, all
	//   in the background
	// - restoreFromBackup is not set: we initHealthCheck right away
	if *restoreFromBackup {
		go func() {
			// restoreFromBackup wil just be a regular action
			// (same as if it was triggered remotely)
			if err := agent.RestoreFromBackup(batchCtx); err != nil {
				println(fmt.Sprintf("RestoreFromBackup failed: %v", err))
				log.Fatalf("RestoreFromBackup failed: %v", err)
			}

			// after the restore is done, start health check
			agent.initHealthCheck()
		}()
	} else {
		// synchronously start health check if needed
		agent.initHealthCheck()
	}

	return agent, nil
}
Example #9
0
func init() {
	servenv.RegisterDefaultFlags()
	stats.NewString("BinaryName").Set("vtaction")
}
Example #10
0
// InitAgent initializes the agent within vttablet.
func InitAgent(
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort int,
	overridesFile string,
) (agent *tabletmanager.ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()
	mysqld := mysqlctl.NewMysqld(mycnf, &dbcfgs.Dba, &dbcfgs.Repl)

	statsType := stats.NewString("TabletType")
	statsKeyspace := stats.NewString("TabletKeyspace")
	statsShard := stats.NewString("TabletShard")
	statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
	statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

	agent, err = tabletmanager.NewActionAgent(topoServer, tabletAlias, mysqld)
	if err != nil {
		return nil, err
	}

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = tabletmanager.NewBinlogPlayerMap(topoServer, &dbcfgs.App.ConnectionParams, mysqld)
	tabletmanager.RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// Action agent listens to changes in zookeeper and makes
	// modifications to this tablet.
	agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) {
		allowQuery := true
		var shardInfo *topo.ShardInfo
		var keyspaceInfo *topo.KeyspaceInfo
		if newTablet.Type == topo.TYPE_MASTER {
			// read the shard to get SourceShards
			shardInfo, err = topoServer.GetShard(newTablet.Keyspace, newTablet.Shard)
			if err != nil {
				log.Errorf("Cannot read shard for this tablet %v: %v", newTablet.Alias, err)
			} else {
				allowQuery = len(shardInfo.SourceShards) == 0
			}

			// read the keyspace to get ShardingColumnType
			keyspaceInfo, err = topoServer.GetKeyspace(newTablet.Keyspace)
			switch err {
			case nil:
				// continue
			case topo.ErrNoNode:
				// backward compatible mode
				keyspaceInfo = topo.NewKeyspaceInfo(newTablet.Keyspace, &topo.Keyspace{})
			default:
				log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err)
				keyspaceInfo = nil
			}
		}

		if newTablet.IsRunningQueryService() && allowQuery {
			if dbcfgs.App.DbName == "" {
				dbcfgs.App.DbName = newTablet.DbName()
			}
			dbcfgs.App.Keyspace = newTablet.Keyspace
			dbcfgs.App.Shard = newTablet.Shard
			if newTablet.Type != topo.TYPE_MASTER {
				dbcfgs.App.EnableInvalidator = true
			} else {
				dbcfgs.App.EnableInvalidator = false
			}
			// Transitioning from replica to master, first disconnect
			// existing connections. "false" indicateds that clients must
			// re-resolve their endpoint before reconnecting.
			if newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER {
				ts.DisallowQueries()
			}
			qrs := ts.LoadCustomRules()
			if newTablet.KeyRange.IsPartial() {
				qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY)
				qr.AddPlanCond(sqlparser.PLAN_INSERT_PK)
				err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange)
				if err != nil {
					log.Warningf("Unable to add keyspace rule: %v", err)
				} else {
					qrs.Add(qr)
				}
			}
			ts.AllowQueries(&dbcfgs.App, schemaOverrides, qrs, mysqld)
			// Disable before enabling to force existing streams to stop.
			binlog.DisableUpdateStreamService()
			binlog.EnableUpdateStreamService(dbcfgs)
		} else {
			ts.DisallowQueries()
			binlog.DisableUpdateStreamService()
		}

		statsType.Set(string(newTablet.Type))
		statsKeyspace.Set(newTablet.Keyspace)
		statsShard.Set(newTablet.Shard)
		statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex()))
		statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex()))

		// See if we need to start or stop any binlog player
		if newTablet.Type == topo.TYPE_MASTER {
			agent.BinlogPlayerMap.RefreshMap(newTablet, keyspaceInfo, shardInfo)
		} else {
			agent.BinlogPlayerMap.StopAllPlayersAndReset()
		}
	})

	if err := agent.Start(mysqld.Port(), port, securePort); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.RegisterQueryService()

	return agent, nil
}
Example #11
0
func init() {
	stats.NewString("BinaryName").Set("vtaction")
}
Example #12
0
// InitAgent initializes the agent within vttablet.
func InitAgent(
	tabletAlias topo.TabletAlias,
	dbcfgs dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	dbConfigsFile, dbCredentialsFile string,
	port, securePort int,
	mycnfFile, overridesFile string) (err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()
	mysqld := mysqlctl.NewMysqld(mycnf, dbcfgs.Dba, dbcfgs.Repl)

	// Start the binlog server service, disabled at start.
	binlogServer = mysqlctl.NewBinlogServer(mysqld)
	mysqlctl.RegisterBinlogServerService(binlogServer)

	// Start the binlog player services, not playing at start.
	binlogPlayerMap = NewBinlogPlayerMap(topoServer, dbcfgs.App.MysqlParams(), mysqld)
	RegisterBinlogPlayerMap(binlogPlayerMap)

	// Compute the bind addresses
	bindAddr := fmt.Sprintf(":%v", port)
	secureAddr := ""
	if securePort != 0 {
		secureAddr = fmt.Sprintf(":%v", securePort)
	}

	statsType := stats.NewString("TabletType")
	statsKeyspace := stats.NewString("TabletKeyspace")
	statsShard := stats.NewString("TabletShard")
	statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
	statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

	// Action agent listens to changes in zookeeper and makes
	// modifications to this tablet.
	agent, err = tm.NewActionAgent(topoServer, tabletAlias, mycnfFile, dbConfigsFile, dbCredentialsFile)
	if err != nil {
		return err
	}
	agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) {
		if newTablet.IsServingType() {
			if dbcfgs.App.DbName == "" {
				dbcfgs.App.DbName = newTablet.DbName()
			}
			dbcfgs.App.Keyspace = newTablet.Keyspace
			dbcfgs.App.Shard = newTablet.Shard
			// Transitioning from replica to master, first disconnect
			// existing connections. "false" indicateds that clients must
			// re-resolve their endpoint before reconnecting.
			if newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER {
				ts.DisallowQueries()
			}
			qrs := ts.LoadCustomRules()
			if newTablet.KeyRange.IsPartial() {
				qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY)
				qr.AddPlanCond(sqlparser.PLAN_INSERT_PK)
				err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange)
				if err != nil {
					log.Warningf("Unable to add keyspace rule: %v", err)
				} else {
					qrs.Add(qr)
				}
			}
			ts.AllowQueries(dbcfgs.App, schemaOverrides, qrs)
			mysqlctl.EnableUpdateStreamService(string(newTablet.Type), dbcfgs)
			if newTablet.Type != topo.TYPE_MASTER {
				ts.StartRowCacheInvalidation()
			}
		} else {
			ts.DisallowQueries()
			ts.StopRowCacheInvalidation()
			mysqlctl.DisableUpdateStreamService()
		}

		statsType.Set(string(newTablet.Type))
		statsKeyspace.Set(newTablet.Keyspace)
		statsShard.Set(newTablet.Shard)
		statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex()))
		statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex()))

		// BinlogServer is only enabled for replicas
		if newTablet.Type == topo.TYPE_REPLICA {
			if !mysqlctl.IsBinlogServerEnabled(binlogServer) {
				mysqlctl.EnableBinlogServerService(binlogServer, dbcfgs.App.DbName)
			}
		} else {
			if mysqlctl.IsBinlogServerEnabled(binlogServer) {
				mysqlctl.DisableBinlogServerService(binlogServer)
			}
		}

		// See if we need to start or stop any binlog player
		if newTablet.Type == topo.TYPE_MASTER {
			binlogPlayerMap.RefreshMap(newTablet)
		} else {
			binlogPlayerMap.StopAllPlayers()
		}
	})

	if err := agent.Start(bindAddr, secureAddr, mysqld.Addr()); err != nil {
		return err
	}

	// register the RPC services from the agent
	agent.RegisterQueryService(mysqld)

	return nil
}
Example #13
0
// Start validates and updates the topology records for the tablet, and performs
// the initial state change callback to start tablet services.
// If initUpdateStream is set, update stream service will also be registered.
func (agent *ActionAgent) Start(ctx context.Context, mysqlPort, vtPort, gRPCPort int32, initUpdateStream bool) error {
	// find our hostname as fully qualified, and IP
	hostname := *tabletHostname
	if hostname == "" {
		var err error
		hostname, err = netutil.FullyQualifiedHostname()
		if err != nil {
			return err
		}
	}
	ipAddrs, err := net.LookupHost(hostname)
	if err != nil {
		return err
	}
	ipAddr := ipAddrs[0]

	// Update bind addr for mysql and query service in the tablet node.
	f := func(tablet *topodatapb.Tablet) error {
		tablet.Hostname = hostname
		tablet.Ip = ipAddr
		if tablet.PortMap == nil {
			tablet.PortMap = make(map[string]int32)
		}
		if mysqlPort != 0 {
			// only overwrite mysql port if we know it, otherwise
			// leave it as is.
			tablet.PortMap["mysql"] = mysqlPort
		}
		if vtPort != 0 {
			tablet.PortMap["vt"] = vtPort
		} else {
			delete(tablet.PortMap, "vt")
		}
		delete(tablet.PortMap, "vts")
		if gRPCPort != 0 {
			tablet.PortMap["grpc"] = gRPCPort
		} else {
			delete(tablet.PortMap, "grpc")
		}

		// Save the original tablet for ownership tests later.
		agent.initialTablet = tablet
		return nil
	}
	if _, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias, f); err != nil {
		return err
	}

	// Verify the topology is correct.
	agent.verifyTopology(ctx)

	// Get and fix the dbname if necessary, only for real instances.
	if !agent.DBConfigs.IsZero() {
		dbname := topoproto.TabletDbName(agent.initialTablet)

		// Update our DB config to match the info we have in the tablet
		if agent.DBConfigs.App.DbName == "" {
			agent.DBConfigs.App.DbName = dbname
		}
		if agent.DBConfigs.Filtered.DbName == "" {
			agent.DBConfigs.Filtered.DbName = dbname
		}
	}

	// create and register the RPC services from UpdateStream
	// (it needs the dbname, so it has to be delayed up to here,
	// but it has to be before updateState below that may use it)
	if initUpdateStream {
		us := binlog.NewUpdateStream(agent.MysqlDaemon, agent.DBConfigs.App.DbName)
		agent.UpdateStream = us
		servenv.OnRun(func() {
			us.RegisterService()
		})
	}
	servenv.OnTerm(func() {
		// Disable UpdateStream (if any) upon entering lameduck.
		// We do this regardless of initUpdateStream, since agent.UpdateStream
		// may have been set from elsewhere.
		if agent.UpdateStream != nil {
			agent.UpdateStream.Disable()
		}
	})

	// initialize tablet server
	if err := agent.QueryServiceControl.InitDBConfig(querypb.Target{
		Keyspace:   agent.initialTablet.Keyspace,
		Shard:      agent.initialTablet.Shard,
		TabletType: agent.initialTablet.Type,
	}, agent.DBConfigs, agent.MysqlDaemon); err != nil {
		return fmt.Errorf("failed to InitDBConfig: %v", err)
	}

	// export a few static variables
	if agent.exportStats {
		statsKeyspace := stats.NewString("TabletKeyspace")
		statsShard := stats.NewString("TabletShard")
		statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
		statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

		statsKeyspace.Set(agent.initialTablet.Keyspace)
		statsShard.Set(agent.initialTablet.Shard)
		if key.KeyRangeIsPartial(agent.initialTablet.KeyRange) {
			statsKeyRangeStart.Set(hex.EncodeToString(agent.initialTablet.KeyRange.Start))
			statsKeyRangeEnd.Set(hex.EncodeToString(agent.initialTablet.KeyRange.End))
		}
	}

	// Initialize the current tablet to match our current running
	// state: Has most field filled in, but type is UNKNOWN.
	// Subsequents calls to updateState or refreshTablet
	// will then work as expected.
	startingTablet := proto.Clone(agent.initialTablet).(*topodatapb.Tablet)
	startingTablet.Type = topodatapb.TabletType_UNKNOWN
	agent.setTablet(startingTablet)

	// run a background task to rebuild the SrvKeyspace in our cell/keyspace
	// if it doesn't exist yet
	go agent.maybeRebuildKeyspace(agent.initialTablet.Alias.Cell, agent.initialTablet.Keyspace)

	return nil
}
Example #14
0
// NewActionAgent creates a new ActionAgent and registers all the
// associated services.
//
// batchCtx is the context that the agent will use for any background tasks
// it spawns.
func NewActionAgent(
	batchCtx context.Context,
	mysqld mysqlctl.MysqlDaemon,
	queryServiceControl tabletserver.Controller,
	tabletAlias *topodatapb.TabletAlias,
	dbcfgs dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, gRPCPort int32,
) (agent *ActionAgent, err error) {
	topoServer := topo.GetServer()

	orc, err := newOrcClient()
	if err != nil {
		return nil, err
	}

	agent = &ActionAgent{
		QueryServiceControl: queryServiceControl,
		HealthReporter:      health.DefaultAggregator,
		batchCtx:            batchCtx,
		TopoServer:          topoServer,
		TabletAlias:         tabletAlias,
		MysqlDaemon:         mysqld,
		DBConfigs:           dbcfgs,
		History:             history.New(historyLength),
		_healthy:            fmt.Errorf("healthcheck not run yet"),
		orc:                 orc,
	}
	agent.registerQueryRuleSources()

	// try to initialize the tablet if we have to
	if err := agent.InitTablet(port, gRPCPort); err != nil {
		return nil, fmt.Errorf("agent.InitTablet failed: %v", err)
	}

	// Create the TabletType stats
	agent.exportStats = true
	agent.statsTabletType = stats.NewString("TabletType")

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient {
		return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered)
	})
	// Stop all binlog players upon entering lameduck.
	servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset)
	RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)
		}
	}

	// Start will get the tablet info, and update our state from it
	if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	servenv.OnRun(func() {
		agent.registerQueryService()
	})

	// two cases then:
	// - restoreFromBackup is set: we restore, then initHealthCheck, all
	//   in the background
	// - restoreFromBackup is not set: we initHealthCheck right away
	if *restoreFromBackup {
		go func() {
			// restoreFromBackup wil just be a regular action
			// (same as if it was triggered remotely)
			if err := agent.RestoreData(batchCtx, logutil.NewConsoleLogger(), false /* deleteBeforeRestore */); err != nil {
				println(fmt.Sprintf("RestoreFromBackup failed: %v", err))
				log.Fatalf("RestoreFromBackup failed: %v", err)
			}

			// after the restore is done, start health check
			agent.initHealthCheck()
		}()
	} else {
		// update our state
		if err := agent.refreshTablet(batchCtx, "Start"); err != nil {
			return nil, err
		}

		// synchronously start health check if needed
		agent.initHealthCheck()
	}

	// Start periodic Orchestrator self-registration, if configured.
	if agent.orc != nil {
		go agent.orc.DiscoverLoop(agent)
	}

	return agent, nil
}
Example #15
0
// Start validates and updates the topology records for the tablet, and performs
// the initial state change callback to start tablet services.
// If initUpdateStream is set, update stream service will also be registered.
func (agent *ActionAgent) Start(ctx context.Context, mysqlPort, vtPort, gRPCPort int32, initUpdateStream bool) error {
	var err error
	if _, err = agent.updateTabletFromTopo(ctx); err != nil {
		return err
	}

	// find our hostname as fully qualified, and IP
	hostname := *tabletHostname
	if hostname == "" {
		hostname, err = netutil.FullyQualifiedHostname()
		if err != nil {
			return err
		}
	}
	ipAddrs, err := net.LookupHost(hostname)
	if err != nil {
		return err
	}
	ipAddr := ipAddrs[0]

	// Update bind addr for mysql and query service in the tablet node.
	f := func(tablet *topodatapb.Tablet) error {
		tablet.Hostname = hostname
		tablet.Ip = ipAddr
		if tablet.PortMap == nil {
			tablet.PortMap = make(map[string]int32)
		}
		if mysqlPort != 0 {
			// only overwrite mysql port if we know it, otherwise
			// leave it as is.
			tablet.PortMap["mysql"] = mysqlPort
		}
		if vtPort != 0 {
			tablet.PortMap["vt"] = vtPort
		} else {
			delete(tablet.PortMap, "vt")
		}
		delete(tablet.PortMap, "vts")
		if gRPCPort != 0 {
			tablet.PortMap["grpc"] = gRPCPort
		} else {
			delete(tablet.PortMap, "grpc")
		}
		return nil
	}
	if _, err := agent.TopoServer.UpdateTabletFields(ctx, agent.Tablet().Alias, f); err != nil {
		return err
	}

	// Reread to get the changes we just made
	tablet, err := agent.updateTabletFromTopo(ctx)
	if err != nil {
		return err
	}

	// Save the original tablet record as it is now (at startup).
	agent.initialTablet = proto.Clone(tablet).(*topodatapb.Tablet)

	if err = agent.verifyTopology(ctx); err != nil {
		return err
	}

	// get and fix the dbname if necessary
	if !agent.DBConfigs.IsZero() {
		// Only for real instances
		// Update our DB config to match the info we have in the tablet
		if agent.DBConfigs.App.DbName == "" {
			agent.DBConfigs.App.DbName = topoproto.TabletDbName(tablet)
		}
		if agent.DBConfigs.Filtered.DbName == "" {
			agent.DBConfigs.Filtered.DbName = topoproto.TabletDbName(tablet)
		}
		agent.DBConfigs.App.Keyspace = tablet.Keyspace
		agent.DBConfigs.App.Shard = tablet.Shard
	}

	// create and register the RPC services from UpdateStream
	// (it needs the dbname, so it has to be delayed up to here,
	// but it has to be before updateState below that may use it)
	if initUpdateStream {
		us := binlog.NewUpdateStream(agent.MysqlDaemon, agent.DBConfigs.App.DbName)
		agent.UpdateStream = us
		servenv.OnRun(func() {
			us.RegisterService()
		})
	}
	servenv.OnTerm(func() {
		// Disable UpdateStream (if any) upon entering lameduck.
		// We do this regardless of initUpdateStream, since agent.UpdateStream
		// may have been set from elsewhere.
		if agent.UpdateStream != nil {
			agent.UpdateStream.Disable()
		}
	})

	// initialize tablet server
	if err := agent.QueryServiceControl.InitDBConfig(querypb.Target{
		Keyspace:   tablet.Keyspace,
		Shard:      tablet.Shard,
		TabletType: tablet.Type,
	}, agent.DBConfigs, agent.MysqlDaemon); err != nil {
		return fmt.Errorf("failed to InitDBConfig: %v", err)
	}

	// export a few static variables
	if agent.exportStats {
		statsKeyspace := stats.NewString("TabletKeyspace")
		statsShard := stats.NewString("TabletShard")
		statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
		statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

		statsKeyspace.Set(tablet.Keyspace)
		statsShard.Set(tablet.Shard)
		if key.KeyRangeIsPartial(tablet.KeyRange) {
			statsKeyRangeStart.Set(hex.EncodeToString(tablet.KeyRange.Start))
			statsKeyRangeEnd.Set(hex.EncodeToString(tablet.KeyRange.End))
		}
	}

	// initialize the key range query rule
	if err := agent.initializeKeyRangeRule(ctx, tablet.Keyspace, tablet.KeyRange); err != nil {
		return err
	}

	// update our state
	oldTablet := &topodatapb.Tablet{}
	agent.updateState(ctx, oldTablet, "Start")

	// run a background task to rebuild the SrvKeyspace in our cell/keyspace
	// if it doesn't exist yet
	go agent.maybeRebuildKeyspace(tablet.Alias.Cell, tablet.Keyspace)

	return nil
}
Example #16
0
// InitAgent initializes the agent within vttablet.
func InitAgent(
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort int,
	overridesFile string,
) (agent *tabletmanager.ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()
	mysqld := mysqlctl.NewMysqld(mycnf, &dbcfgs.Dba, &dbcfgs.Repl)

	statsType := stats.NewString("TabletType")
	statsKeyspace := stats.NewString("TabletKeyspace")
	statsShard := stats.NewString("TabletShard")
	statsKeyRangeStart := stats.NewString("TabletKeyRangeStart")
	statsKeyRangeEnd := stats.NewString("TabletKeyRangeEnd")

	agent, err = tabletmanager.NewActionAgent(topoServer, tabletAlias, mysqld)
	if err != nil {
		return nil, err
	}

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = tabletmanager.NewBinlogPlayerMap(topoServer, &dbcfgs.App.ConnectionParams, mysqld)
	tabletmanager.RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// Action agent listens to changes in zookeeper and makes
	// modifications to this tablet.
	agent.AddChangeCallback(func(oldTablet, newTablet topo.Tablet) {
		allowQuery := true
		var shardInfo *topo.ShardInfo
		var keyspaceInfo *topo.KeyspaceInfo
		if newTablet.Type == topo.TYPE_MASTER {
			// read the shard to get SourceShards
			shardInfo, err = topoServer.GetShard(newTablet.Keyspace, newTablet.Shard)
			if err != nil {
				log.Errorf("Cannot read shard for this tablet %v: %v", newTablet.Alias, err)
			} else {
				allowQuery = len(shardInfo.SourceShards) == 0
			}

			// read the keyspace to get ShardingColumnType
			keyspaceInfo, err = topoServer.GetKeyspace(newTablet.Keyspace)
			switch err {
			case nil:
				// continue
			case topo.ErrNoNode:
				// backward compatible mode
				keyspaceInfo = topo.NewKeyspaceInfo(newTablet.Keyspace, &topo.Keyspace{})
			default:
				log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err)
				keyspaceInfo = nil
			}
		}

		if newTablet.IsRunningQueryService() && allowQuery {
			if dbcfgs.App.DbName == "" {
				dbcfgs.App.DbName = newTablet.DbName()
			}
			dbcfgs.App.Keyspace = newTablet.Keyspace
			dbcfgs.App.Shard = newTablet.Shard
			if newTablet.Type != topo.TYPE_MASTER {
				dbcfgs.App.EnableInvalidator = true
			} else {
				dbcfgs.App.EnableInvalidator = false
			}

			// There are a few transitions when we're
			// going to need to restart the query service:
			// - transitioning from replica to master, so clients
			//   that were already connected don't keep on using
			//   the master as replica or rdonly.
			// - having different parameters for the query
			//   service. It needs to stop and restart with the
			//   new parameters. That includes:
			//   - changing KeyRange
			//   - changing the BlacklistedTables list
			if (newTablet.Type == topo.TYPE_MASTER &&
				oldTablet.Type != topo.TYPE_MASTER) ||
				(newTablet.KeyRange != oldTablet.KeyRange) ||
				!reflect.DeepEqual(newTablet.BlacklistedTables, oldTablet.BlacklistedTables) {
				ts.DisallowQueries()
			}
			qrs := ts.LoadCustomRules()
			if newTablet.KeyRange.IsPartial() {
				qr := ts.NewQueryRule("enforce keyspace_id range", "keyspace_id_not_in_range", ts.QR_FAIL_QUERY)
				qr.AddPlanCond(sqlparser.PLAN_INSERT_PK)
				err = qr.AddBindVarCond("keyspace_id", true, true, ts.QR_NOTIN, newTablet.KeyRange)
				if err != nil {
					log.Warningf("Unable to add keyspace rule: %v", err)
				} else {
					qrs.Add(qr)
				}
			}
			if len(newTablet.BlacklistedTables) > 0 {
				log.Infof("Blacklisting tables %v", strings.Join(newTablet.BlacklistedTables, ", "))
				qr := ts.NewQueryRule("enforce blacklisted tables", "blacklisted_table", ts.QR_FAIL_QUERY)
				for _, t := range newTablet.BlacklistedTables {
					qr.AddTableCond(t)
				}
				qrs.Add(qr)
			}
			ts.AllowQueries(&dbcfgs.App, schemaOverrides, qrs, mysqld)
			// Disable before enabling to force existing streams to stop.
			binlog.DisableUpdateStreamService()
			binlog.EnableUpdateStreamService(dbcfgs)
		} else {
			ts.DisallowQueries()
			binlog.DisableUpdateStreamService()
		}

		statsType.Set(string(newTablet.Type))
		statsKeyspace.Set(newTablet.Keyspace)
		statsShard.Set(newTablet.Shard)
		statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex()))
		statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex()))

		// See if we need to start or stop any binlog player
		if newTablet.Type == topo.TYPE_MASTER {
			agent.BinlogPlayerMap.RefreshMap(newTablet, keyspaceInfo, shardInfo)
		} else {
			agent.BinlogPlayerMap.StopAllPlayersAndReset()
		}
	})

	if err := agent.Start(mysqld.Port(), port, securePort); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.RegisterQueryService()

	// start health check if needed
	initHeathCheck(agent)

	return agent, nil
}