Esempio n. 1
// Published creates a wrapper for net.Listener that
// publishes connection stats.
func Published(l net.Listener, countTag, acceptTag string) net.Listener {
	return &CountingListener{
		Listener:   l,
		ConnCount:  stats.NewInt(countTag),
		ConnAccept: stats.NewInt(acceptTag),
Esempio n. 2
func Init() {
	defer mu.Unlock()
	if inited {
		log.Fatal("servenv.Init called second time")
	inited = true

	// Once you run as root, you pretty much destroy the chances of a
	// non-privileged user starting the program correctly.
	if uid := os.Getuid(); uid == 0 {
		log.Fatalf("servenv.Init: running this as root makes no sense")

	runtime.MemProfileRate = *memProfileRate
	gomaxprocs := os.Getenv("GOMAXPROCS")
	if gomaxprocs == "" {
		gomaxprocs = "1"

	// We used to set this limit directly, but you pretty much have to
	// use a root account to allow increasing a limit reliably. Dropping
	// privileges is also tricky. The best strategy is to make a shell
	// script set up the limits as root and switch users before starting
	// the server.
	fdLimit := &syscall.Rlimit{}
	if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, fdLimit); err != nil {
		log.Errorf("max-open-fds failed: %v", err)
	fdl := stats.NewInt("MaxFds")

Esempio n. 3
func NewQueryEngine(config Config) *QueryEngine {
	qe := &QueryEngine{}
	qe.cachePool = NewCachePool("CachePool", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.connPool = NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamConnPool = NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamTokens = sync2.NewSemaphore(config.StreamExecThrottle, time.Duration(config.StreamWaitTimeout*1e9))
	qe.reservedPool = NewReservedPool("ReservedPool")
	qe.txPool = NewConnectionPool("TxPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap
	qe.activeTxPool = NewActiveTxPool("ActiveTxPool", time.Duration(config.TransactionTimeout*1e9))
	qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.consolidator = NewConsolidator()
	qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER)
	qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize)
	qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize)
	stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get))
	stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get))
	queryStats = stats.NewTimings("Queries")
	stats.NewRates("QPS", queryStats, 15, 60e9)
	waitStats = stats.NewTimings("Waits")
	killStats = stats.NewCounters("Kills")
	errorStats = stats.NewCounters("Errors")
	resultStats = stats.NewHistogram("Results", resultBuckets)
	stats.Publish("SpotCheckRatio", stats.FloatFunc(func() float64 {
		return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER
	spotCheckCount = stats.NewInt("SpotCheckCount")
	return qe
Esempio n. 4
func init() {
	// init global goroutines to aggregate stats.
	aggrChan = make(chan *queryInfo, aggrChanSize)
	gatewayStatsChanFull = stats.NewInt("GatewayStatsChanFullCount")
	go resetAggregators()
	go processQueryInfo()
Esempio n. 5
// NewQueryServiceStats returns a new QueryServiceStats instance.
func NewQueryServiceStats(statsPrefix string, enablePublishStats bool) *QueryServiceStats {
	mysqlStatsName := ""
	queryStatsName := ""
	qpsRateName := ""
	waitStatsName := ""
	killStatsName := ""
	infoErrorsName := ""
	errorStatsName := ""
	internalErrorsName := ""
	resultStatsName := ""
	spotCheckCountName := ""
	userTableQueryCountName := ""
	userTableQueryTimesNsName := ""
	userTransactionCountName := ""
	userTransactionTimesNsName := ""
	if enablePublishStats {
		mysqlStatsName = statsPrefix + "Mysql"
		queryStatsName = statsPrefix + "Queries"
		qpsRateName = statsPrefix + "QPS"
		waitStatsName = statsPrefix + "Waits"
		killStatsName = statsPrefix + "Kills"
		infoErrorsName = statsPrefix + "InfoErrors"
		errorStatsName = statsPrefix + "Errors"
		internalErrorsName = statsPrefix + "InternalErrors"
		resultStatsName = statsPrefix + "Results"
		spotCheckCountName = statsPrefix + "RowcacheSpotCheckCount"
		userTableQueryCountName = statsPrefix + "UserTableQueryCount"
		userTableQueryTimesNsName = statsPrefix + "UserTableQueryTimesNs"
		userTransactionCountName = statsPrefix + "UserTransactionCount"
		userTransactionTimesNsName = statsPrefix + "UserTransactionTimesNs"
	resultBuckets := []int64{0, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000}
	queryStats := stats.NewTimings(queryStatsName)
	return &QueryServiceStats{
		MySQLStats: stats.NewTimings(mysqlStatsName),
		QueryStats: queryStats,
		WaitStats:  stats.NewTimings(waitStatsName),
		KillStats:  stats.NewCounters(killStatsName, "Transactions", "Queries"),
		InfoErrors: stats.NewCounters(infoErrorsName, "Retry", "Fatal", "DupKey"),
		ErrorStats: stats.NewCounters(errorStatsName, "Fail", "TxPoolFull", "NotInTx", "Deadlock"),
		InternalErrors: stats.NewCounters(internalErrorsName, "Task", "MemcacheStats",
			"Mismatch", "StrayTransactions", "Invalidation", "Panic", "HungQuery", "Schema"),
		UserTableQueryCount: stats.NewMultiCounters(
			userTableQueryCountName, []string{"TableName", "CallerID", "Type"}),
		UserTableQueryTimesNs: stats.NewMultiCounters(
			userTableQueryTimesNsName, []string{"TableName", "CallerID", "Type"}),
		UserTransactionCount: stats.NewMultiCounters(
			userTransactionCountName, []string{"CallerID", "Conclusion"}),
		UserTransactionTimesNs: stats.NewMultiCounters(
			userTransactionTimesNsName, []string{"CallerID", "Conclusion"}),
		// Sample every 5 seconds and keep samples for up to 15 minutes.
		QPSRates:       stats.NewRates(qpsRateName, queryStats, 15*60/5, 5*time.Second),
		ResultStats:    stats.NewHistogram(resultStatsName, resultBuckets),
		SpotCheckCount: stats.NewInt(spotCheckCountName),
Esempio n. 6
func TestInt(t *testing.T) {
	v := stats.NewInt("")
	load := func() {
	testMetric(t, v, load,
		`Desc{fqName: "test_name", help: "test_help", constLabels: {}, variableLabels: []}`,
		`gauge:<value:1234 > `,
Esempio n. 7
func init() {
	t, err := time.Parse(time.UnixDate, buildTime)
	if buildTime != "" && err != nil {
		panic(fmt.Sprintf("Couldn't parse build timestamp %q: %v", buildTime, err))
Esempio n. 8
// NewActionAgent creates a new ActionAgent and registers all the
// associated services
func NewActionAgent(
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort int,
	overridesFile string,
	lockTimeout time.Duration,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()
	mysqld := mysqlctl.NewMysqld("Dba", mycnf, &dbcfgs.Dba, &dbcfgs.Repl)

	agent = &ActionAgent{
		TopoServer:         topoServer,
		TabletAlias:        tabletAlias,
		Mysqld:             mysqld,
		MysqlDaemon:        mysqld,
		DBConfigs:          dbcfgs,
		SchemaOverrides:    schemaOverrides,
		LockTimeout:        lockTimeout,
		done:               make(chan struct{}),
		History:            history.New(historyLength),
		lastHealthMapCount: stats.NewInt("LastHealthMapCount"),
		changeItems:        make(chan tabletChangeItem, 100),

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)

	if err := agent.Start(mysqlPort, port, securePort); err != nil {
		return nil, err

	// register the RPC services from the agent

	// start health check if needed

	return agent, nil
Esempio n. 9
func NewZkReader(resolveLocal bool, preload []string) *ZkReader {
	zkr := &ZkReader{zcell: make(map[string]*zkCell), resolveLocal: resolveLocal}
	if resolveLocal {
		zkr.localCell = zk.GuessLocalCell()
	zkr.rpcCalls = stats.NewInt("ZkReaderRpcCalls")
	zkr.unknownCellErrors = stats.NewInt("ZkReaderUnknownCellErrors")
	zkr.zkrStats = newZkrStats()

	stats.PublishJSONFunc("ZkReader", zkr.statsJSON)

	// start some cells
	for _, cellName := range preload {
		_, path, err := zkr.getCell("/zk/" + cellName)
		if err != nil {
			log.Errorf("Cell " + cellName + " could not be preloaded: " + err.Error())
		} else {
			log.Infof("Cell " + cellName + " preloaded for: " + path)
	return zkr
Esempio n. 10
// NewQueryEngine creates a new QueryEngine.
// This is a singleton class.
// You must call this only once.
func NewQueryEngine(config Config) *QueryEngine {
	qe := &QueryEngine{}
	qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9))

	mysqlStats = stats.NewTimings("Mysql")

	// Pools
	qe.cachePool = NewCachePool("Rowcache", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.connPool = dbconnpool.NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamConnPool = dbconnpool.NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.txPool = dbconnpool.NewConnectionPool("TransactionPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap

	// Services
	qe.activeTxPool = NewActiveTxPool("ActiveTransactionPool", time.Duration(config.TransactionTimeout*1e9))
	qe.connKiller = NewConnectionKiller(1, time.Duration(config.IdleTimeout*1e9))
	qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), qe.connKiller)
	qe.consolidator = NewConsolidator()
	qe.invalidator = NewRowcacheInvalidator(qe)
	qe.streamQList = NewQueryList(qe.connKiller)

	// Vars
	qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER)
	if config.StrictMode {
	qe.strictTableAcl = config.StrictTableAcl
	qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize)
	qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize)

	// loggers
	qe.accessCheckerLogger = logutil.NewThrottledLogger("accessChecker", 1*time.Second)

	// Stats
	stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get))
	stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get))
	queryStats = stats.NewTimings("Queries")
	QPSRates = stats.NewRates("QPS", queryStats, 15, 60*time.Second)
	waitStats = stats.NewTimings("Waits")
	killStats = stats.NewCounters("Kills")
	infoErrors = stats.NewCounters("InfoErrors")
	errorStats = stats.NewCounters("Errors")
	internalErrors = stats.NewCounters("InternalErrors")
	resultStats = stats.NewHistogram("Results", resultBuckets)
	stats.Publish("RowcacheSpotCheckRatio", stats.FloatFunc(func() float64 {
		return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER
	spotCheckCount = stats.NewInt("RowcacheSpotCheckCount")

	return qe
Esempio n. 11
// NewQueryEngine creates a new QueryEngine.
// This is a singleton class.
// You must call this only once.
func NewQueryEngine(config Config) *QueryEngine {
	qe := &QueryEngine{}

	// services
	qe.cachePool = NewCachePool("RowcachePool", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9), config.SensitiveMode)
	qe.connPool = NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamConnPool = NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.txPool = NewConnectionPool("TransactionPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap
	qe.activeTxPool = NewActiveTxPool("ActiveTransactionPool", time.Duration(config.TransactionTimeout*1e9))
	qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.consolidator = NewConsolidator()

	// vars
	qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER)
	if config.StrictMode {
	qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize)
	qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize)

	// stats
	stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get))
	stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get))
	queryStats = stats.NewTimings("Queries")
	QPSRates = stats.NewRates("QPS", queryStats, 15, 60*time.Second)
	waitStats = stats.NewTimings("Waits")
	killStats = stats.NewCounters("Kills")
	infoErrors = stats.NewCounters("InfoErrors")
	errorStats = stats.NewCounters("Errors")
	internalErrors = stats.NewCounters("InternalErrors")
	resultStats = stats.NewHistogram("Results", resultBuckets)
	stats.Publish("SpotCheckRatio", stats.FloatFunc(func() float64 {
		return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER
	spotCheckCount = stats.NewInt("SpotCheckCount")

	return qe
Esempio n. 12
// NewQueryServiceStats returns a new QueryServiceStats instance.
func NewQueryServiceStats(statsPrefix string, enablePublishStats bool) *QueryServiceStats {
	mysqlStatsName := ""
	queryStatsName := ""
	qpsRateName := ""
	waitStatsName := ""
	killStatsName := ""
	infoErrorsName := ""
	errorStatsName := ""
	internalErrorsName := ""
	resultStatsName := ""
	spotCheckCountName := ""
	if enablePublishStats {
		mysqlStatsName = statsPrefix + "Mysql"
		queryStatsName = statsPrefix + "Queries"
		qpsRateName = statsPrefix + "QPS"
		waitStatsName = statsPrefix + "Waits"
		killStatsName = statsPrefix + "Kills"
		infoErrorsName = statsPrefix + "InfoErrors"
		errorStatsName = statsPrefix + "Errors"
		internalErrorsName = statsPrefix + "InternalErrors"
		resultStatsName = statsPrefix + "Results"
		spotCheckCountName = statsPrefix + "RowcacheSpotCheckCount"
	resultBuckets := []int64{0, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000}
	queryStats := stats.NewTimings(queryStatsName)
	return &QueryServiceStats{
		MySQLStats:     stats.NewTimings(mysqlStatsName),
		QueryStats:     queryStats,
		WaitStats:      stats.NewTimings(waitStatsName),
		KillStats:      stats.NewCounters(killStatsName),
		InfoErrors:     stats.NewCounters(infoErrorsName),
		ErrorStats:     stats.NewCounters(errorStatsName),
		InternalErrors: stats.NewCounters(internalErrorsName),
		QPSRates:       stats.NewRates(qpsRateName, queryStats, 15, 60*time.Second),
		ResultStats:    stats.NewHistogram(resultStatsName, resultBuckets),
		SpotCheckCount: stats.NewInt(spotCheckCountName),
Esempio n. 13
	Run(context.Context) error

var (
	retryDuration         = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)")
	executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls")
	remoteActionsTimeout  = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)")
	useV3ReshardingMode   = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.")

	healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology")
	healthcheckRetryDelay      = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck")
	healthCheckTimeout         = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period")

	statsState = stats.NewString("WorkerState")
	// statsRetryCount is the total number of times a query to vttablet had to be retried.
	statsRetryCount = stats.NewInt("WorkerRetryCount")
	// statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly".
	statsRetryCounters = stats.NewCounters("WorkerRetryCounters")
	// statsThrottledCounters is the number of times a write has been throttled,
	// grouped by (keyspace, shard, threadID). Mainly used for testing.
	// If throttling is enabled, this should always be non-zero for all threads.
	statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"})
	// statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing.
	statsStateDurationsNs = stats.NewCounters("WorkerStateDurations")
	// statsOnlineInsertsCounters tracks for every table how many rows were
	// inserted during the online clone (reconciliation) phase.
	statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were updated.
	statsOnlineUpdatesCounters = stats.NewCounters("WorkerOnlineUpdatesCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were deleted.
	statsOnlineDeletesCounters = stats.NewCounters("WorkerOnlineDeletesCounters")
Esempio n. 14
	querypb ""

// NewConnFunc is a factory method that creates a Conn instance
// using given ConnParams.
type NewConnFunc func(params ConnParams) (Conn, error)

var (
	defaultConn NewConnFunc

	// mu protects conns.
	mu    sync.Mutex
	conns = make(map[string]NewConnFunc)
	// Keep track of the total number of connections opened to MySQL. This is mainly
	// useful for tests, where we use this to approximate the number of ports that we used.
	connCount = stats.NewInt("mysql-new-connection-count")

// Conn defines the behavior for the low level db connection
type Conn interface {
	// ExecuteFetch executes the query on the connection
	ExecuteFetch(query string, maxrows int, wantfields bool) (*sqltypes.Result, error)
	// ExecuteFetchMap returns a map from column names to cell data for a query
	// that should return exactly 1 row.
	ExecuteFetchMap(query string) (map[string]string, error)
	// ExecuteStreamFetch starts a streaming query to db server. Use FetchNext
	// on the Connection until it returns nil or error
	ExecuteStreamFetch(query string) error
	// Close closes the db connection
	// IsClosed returns if the connection was ever closed
Esempio n. 15
const (
	DISABLED int64 = iota

var usStateNames = map[int64]string{
	ENABLED:  "Enabled",
	DISABLED: "Disabled",

var (
	streamCount          = stats.NewCounters("UpdateStreamStreamCount")
	updateStreamErrors   = stats.NewCounters("UpdateStreamErrors")
	updateStreamEvents   = stats.NewCounters("UpdateStreamEvents")
	keyrangeStatements   = stats.NewInt("UpdateStreamKeyRangeStatements")
	keyrangeTransactions = stats.NewInt("UpdateStreamKeyRangeTransactions")
	tablesStatements     = stats.NewInt("UpdateStreamTablesStatements")
	tablesTransactions   = stats.NewInt("UpdateStreamTablesTransactions")

type UpdateStream struct {
	mycnf *mysqlctl.Mycnf

	actionLock     sync.Mutex
	state          sync2.AtomicInt64
	mysqld         *mysqlctl.Mysqld
	stateWaitGroup sync.WaitGroup
	dbname         string
	streams        streamList
Esempio n. 16
// NewActionAgent creates a new ActionAgent and registers all the
// associated services.
// batchCtx is the context that the agent will use for any background tasks
// it spawns.
func NewActionAgent(
	batchCtx context.Context,
	mysqld mysqlctl.MysqlDaemon,
	queryServiceControl tabletserver.QueryServiceControl,
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort, gRPCPort int,
	overridesFile string,
	lockTimeout time.Duration,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()

	agent = &ActionAgent{
		QueryServiceControl: queryServiceControl,
		HealthReporter:      health.DefaultAggregator,
		batchCtx:            batchCtx,
		TopoServer:          topoServer,
		TabletAlias:         tabletAlias,
		MysqlDaemon:         mysqld,
		DBConfigs:           dbcfgs,
		SchemaOverrides:     schemaOverrides,
		LockTimeout:         lockTimeout,
		History:             history.New(historyLength),
		lastHealthMapCount:  stats.NewInt("LastHealthMapCount"),
		_healthy:            fmt.Errorf("healthcheck not run yet"),
		healthStreamMap:     make(map[int]chan<- *actionnode.HealthStreamReply),

	// try to initialize the tablet if we have to
	if err := agent.InitTablet(port, securePort, gRPCPort); err != nil {
		return nil, fmt.Errorf("agent.InitTablet failed: %v", err)

	// Publish and set the TargetTabletType. Not a global var
	// since it should never be changed.
	statsTabletType := stats.NewString("TargetTabletType")

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)

	if err := agent.Start(batchCtx, mysqlPort, port, securePort, gRPCPort); err != nil {
		return nil, err

	// register the RPC services from the agent

	// two cases then:
	// - restoreFromBackup is set: we restore, then initHealthCheck, all
	//   in the background
	// - restoreFromBackup is not set: we initHealthCheck right away
	if *restoreFromBackup {
		go func() {
			// restoreFromBackup wil just be a regular action
			// (same as if it was triggered remotely)
			if err := agent.RestoreFromBackup(batchCtx); err != nil {
				println(fmt.Sprintf("RestoreFromBackup failed: %v", err))
				log.Fatalf("RestoreFromBackup failed: %v", err)

			// after the restore is done, start health check
	} else {
		// synchronously start health check if needed

	return agent, nil
Esempio n. 17
	Run(context.Context) error

var (
	retryDuration         = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)")
	executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls")
	remoteActionsTimeout  = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)")
	useV3ReshardingMode   = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.")

	healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology")
	healthcheckRetryDelay      = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck")
	healthCheckTimeout         = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period")

	statsState = stats.NewString("WorkerState")
	// statsRetryCount is the total number of times a query to vttablet had to be retried.
	statsRetryCount = stats.NewInt("WorkerRetryCount")
	// statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly".
	statsRetryCounters = stats.NewCounters("WorkerRetryCounters")
	// statsThrottledCounters is the number of times a write has been throttled,
	// grouped by (keyspace, shard, threadID). Mainly used for testing.
	// If throttling is enabled, this should always be non-zero for all threads.
	statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"})
	// statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing.
	statsStateDurationsNs = stats.NewCounters("WorkerStateDurations")

	// statsOnlineInsertsCounters tracks for every table how many rows were
	// inserted during the online clone (reconciliation) phase.
	statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were updated.
	statsOnlineUpdatesCounters = stats.NewCounters("WorkerOnlineUpdatesCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were deleted.
Esempio n. 18
import (


const (
	filteredReplicationUnfriendlyAnnotation = "/* vtgate:: filtered_replication_unfriendly */"

var (
	filteredReplicationUnfriendlyStatementsCount = stats.NewInt("FilteredReplicationUnfriendlyStatementsCount")

// AnnotateIfDML annotates 'sql' based on 'keyspaceIDs'
// If 'sql' is not a DML statement no annotation is added.
// If 'sql' is a DML statement and contains exactly one keyspaceID
//    it is used to annotate 'sql'
// Otherwise 'sql' is annotated as replication-unfriendly.
func AnnotateIfDML(sql string, keyspaceIDs [][]byte) string {
	if !IsDML(sql) {
		return sql
	if len(keyspaceIDs) == 1 {
		return AddKeyspaceID(sql, keyspaceIDs[0], "")
Esempio n. 19
// NewActionAgent creates a new ActionAgent and registers all the
// associated services.
// batchCtx is the context that the agent will use for any background tasks
// it spawns.
func NewActionAgent(
	batchCtx context.Context,
	mysqld mysqlctl.MysqlDaemon,
	queryServiceControl tabletserver.Controller,
	tabletAlias *topodatapb.TabletAlias,
	dbcfgs dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, gRPCPort int32,
	overridesFile string,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()

	agent = &ActionAgent{
		QueryServiceControl: queryServiceControl,
		HealthReporter:      health.DefaultAggregator,
		batchCtx:            batchCtx,
		TopoServer:          topoServer,
		TabletAlias:         tabletAlias,
		MysqlDaemon:         mysqld,
		DBConfigs:           dbcfgs,
		SchemaOverrides:     schemaOverrides,
		History:             history.New(historyLength),
		lastHealthMapCount:  stats.NewInt("LastHealthMapCount"),
		_healthy:            fmt.Errorf("healthcheck not run yet"),

	// try to initialize the tablet if we have to
	if err := agent.InitTablet(port, gRPCPort); err != nil {
		return nil, fmt.Errorf("agent.InitTablet failed: %v", err)

	// Publish and set the TargetTabletType. Not a global var
	// since it should never be changed.
	statsTabletType := stats.NewString("TargetTabletType")

	// Create the TabletType stats
	agent.exportStats = true
	agent.statsTabletType = stats.NewString("TabletType")

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient {
		return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered)
	// Stop all binlog players upon entering lameduck.

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)

	// Start will get the tablet info, and update our state from it
	if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil {
		return nil, err

	// register the RPC services from the agent

	// two cases then:
	// - restoreFromBackup is set: we restore, then initHealthCheck, all
	//   in the background
	// - restoreFromBackup is not set: we initHealthCheck right away
	if *restoreFromBackup {
		go func() {
			// restoreFromBackup wil just be a regular action
			// (same as if it was triggered remotely)
			if err := agent.RestoreFromBackup(batchCtx); err != nil {
				println(fmt.Sprintf("RestoreFromBackup failed: %v", err))
				log.Fatalf("RestoreFromBackup failed: %v", err)

			// after the restore is done, start health check
	} else {
		// synchronously start health check if needed

	return agent, nil
Esempio n. 20

	mproto ""
	tproto ""

	pb ""

var danglingTabletConn = stats.NewInt("DanglingTabletConn")

// ShardConn represents a load balanced connection to a group
// of vttablets that belong to the same shard. ShardConn can
// be concurrently used across goroutines. Such requests are
// interleaved on the same underlying connection.
type ShardConn struct {
	keyspace           string
	shard              string
	tabletType         topo.TabletType
	retryDelay         time.Duration
	retryCount         int
	connTimeoutTotal   time.Duration
	connTimeoutPerConn time.Duration
	connLife           time.Duration
	balancer           *Balancer
Esempio n. 21
	if len(lines) == 0 {
		log.Warningf("no results for %v %v", req.qtype, req.qname)
	return lines, nil

func write(w io.Writer, line string) {
	_, err := io.WriteString(w, line)
	if err != nil {
		log.Errorf("write failed: %v", err)

var (
	requestCount = stats.NewInt("PdnsRequestCount")
	errorCount   = stats.NewInt("PdnsErrorCount")
	emptyCount   = stats.NewInt("PdnsEmptyCount")

func (pd *pdns) Serve(r io.Reader, w io.Writer) {
	log.Infof("starting zkns resolver")
	bufr := bufio.NewReader(r)
	needHandshake := true
	for {
		line, isPrefix, err := bufr.ReadLine()
		if err == nil && isPrefix {
			err = errLongLine
		if err == io.EOF {
Esempio n. 22
	// ResolveDestinationMasters forces the worker to (re)resolve the topology and update
	// the destination masters that it knows about.
	ResolveDestinationMasters(ctx context.Context) error

	// GetDestinationMaster returns the most recently resolved destination master for a particular shard.
	GetDestinationMaster(shardName string) (*topo.TabletInfo, error)

var (
	resolveTTL            = flag.Duration("resolve_ttl", 15*time.Second, "Amount of time that a topo resolution can be cached for")
	executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls")
	remoteActionsTimeout  = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)")

	statsState = stats.NewString("WorkerState")
	// the number of times that the worker attempst to reresolve the masters
	statsDestinationAttemptedResolves = stats.NewInt("WorkerDestinationAttemptedResolves")
	// the number of times that the worker actually hits the topo server, i.e., they don't
	// use a cached topology
	statsDestinationActualResolves = stats.NewInt("WorkerDestinationActualResolves")
	statsRetryCounters             = stats.NewCounters("WorkerRetryCount")

// resetVars resets the debug variables that are meant to provide information on a
// per-run basis. This should be called at the beginning of each worker run.
func resetVars() {
Esempio n. 23

	topodatapb ""
	vtrpcpb ""

var (
	enableFakeMasterBuffer = flag.Bool("enable_fake_master_buffer", false, "Enable fake master buffering.")
	bufferKeyspace         = flag.String("buffer_keyspace", "", "The name of the keyspace to buffer master requests on.")
	bufferShard            = flag.String("buffer_shard", "", "The name of the shard to buffer master requests on.")
	maxBufferSize          = flag.Int("max_buffer_size", 10, "The maximum number of master requests to buffer at a time.")
	fakeBufferDelay        = flag.Duration("fake_buffer_delay", 1*time.Second, "The amount of time that we should delay all master requests for, to fake a buffer.")

	bufferedRequestsAttempted  = stats.NewInt("BufferedRequestsAttempted")
	bufferedRequestsSuccessful = stats.NewInt("BufferedRequestsSuccessful")
	// Use this lock when adding to the number of currently buffered requests.
	bufferMu         sync.Mutex
	bufferedRequests = stats.NewInt("BufferedRequests")

// timeSleep can be mocked out in unit tests
var timeSleep = time.Sleep

// errBufferFull is the error returned a buffer request is rejected because the buffer is full.
var errBufferFull = vterrors.FromError(
	errors.New("master request buffer full, rejecting request"),
Esempio n. 24

	log ""
	rpc ""

const (
	connected = "200 Connected to Go RPC"

var (
	connCount    = stats.NewInt("connection-count")
	connAccepted = stats.NewInt("connection-accepted")

type ClientCodecFactory func(conn io.ReadWriteCloser) rpc.ClientCodec

type BufferedConnection struct {
	isClosed bool

func NewBufferedConnection(conn io.ReadWriteCloser) *BufferedConnection {
	return &BufferedConnection{false, bufio.NewReader(conn), conn}
Esempio n. 25
package umgmt

import (

	log ""

var connectionCount = stats.NewInt("ConnectionCount")
var connectionAccepted = stats.NewInt("ConnectionAccepted")

type connCountConn struct {
	listener *connCountListener
	closed   bool

func (c *connCountConn) Close() (err error) {
	// in case there is a race closing a client
	if c.closed {
		return nil
	err = c.Conn.Close()
	c.closed = true