// Published creates a wrapper for net.Listener that // publishes connection stats. func Published(l net.Listener, countTag, acceptTag string) net.Listener { return &CountingListener{ Listener: l, ConnCount: stats.NewInt(countTag), ConnAccept: stats.NewInt(acceptTag), } }
func Init() { mu.Lock() defer mu.Unlock() if inited { log.Fatal("servenv.Init called second time") } inited = true // Once you run as root, you pretty much destroy the chances of a // non-privileged user starting the program correctly. if uid := os.Getuid(); uid == 0 { log.Fatalf("servenv.Init: running this as root makes no sense") } runtime.MemProfileRate = *memProfileRate gomaxprocs := os.Getenv("GOMAXPROCS") if gomaxprocs == "" { gomaxprocs = "1" } // We used to set this limit directly, but you pretty much have to // use a root account to allow increasing a limit reliably. Dropping // privileges is also tricky. The best strategy is to make a shell // script set up the limits as root and switch users before starting // the server. fdLimit := &syscall.Rlimit{} if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, fdLimit); err != nil { log.Errorf("max-open-fds failed: %v", err) } fdl := stats.NewInt("MaxFds") fdl.Set(int64(fdLimit.Cur)) onInitHooks.Fire() }
func NewQueryEngine(config Config) *QueryEngine { qe := &QueryEngine{} qe.cachePool = NewCachePool("CachePool", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9)) qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9)) qe.connPool = NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9)) qe.streamConnPool = NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9)) qe.streamTokens = sync2.NewSemaphore(config.StreamExecThrottle, time.Duration(config.StreamWaitTimeout*1e9)) qe.reservedPool = NewReservedPool("ReservedPool") qe.txPool = NewConnectionPool("TxPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap qe.activeTxPool = NewActiveTxPool("ActiveTxPool", time.Duration(config.TransactionTimeout*1e9)) qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9)) qe.consolidator = NewConsolidator() qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER) qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize) qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize) stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get)) stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get)) queryStats = stats.NewTimings("Queries") stats.NewRates("QPS", queryStats, 15, 60e9) waitStats = stats.NewTimings("Waits") killStats = stats.NewCounters("Kills") errorStats = stats.NewCounters("Errors") resultStats = stats.NewHistogram("Results", resultBuckets) stats.Publish("SpotCheckRatio", stats.FloatFunc(func() float64 { return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER })) spotCheckCount = stats.NewInt("SpotCheckCount") return qe }
func init() { // init global goroutines to aggregate stats. aggrChan = make(chan *queryInfo, aggrChanSize) gatewayStatsChanFull = stats.NewInt("GatewayStatsChanFullCount") go resetAggregators() go processQueryInfo() }
// NewQueryServiceStats returns a new QueryServiceStats instance. func NewQueryServiceStats(statsPrefix string, enablePublishStats bool) *QueryServiceStats { mysqlStatsName := "" queryStatsName := "" qpsRateName := "" waitStatsName := "" killStatsName := "" infoErrorsName := "" errorStatsName := "" internalErrorsName := "" resultStatsName := "" spotCheckCountName := "" userTableQueryCountName := "" userTableQueryTimesNsName := "" userTransactionCountName := "" userTransactionTimesNsName := "" if enablePublishStats { mysqlStatsName = statsPrefix + "Mysql" queryStatsName = statsPrefix + "Queries" qpsRateName = statsPrefix + "QPS" waitStatsName = statsPrefix + "Waits" killStatsName = statsPrefix + "Kills" infoErrorsName = statsPrefix + "InfoErrors" errorStatsName = statsPrefix + "Errors" internalErrorsName = statsPrefix + "InternalErrors" resultStatsName = statsPrefix + "Results" spotCheckCountName = statsPrefix + "RowcacheSpotCheckCount" userTableQueryCountName = statsPrefix + "UserTableQueryCount" userTableQueryTimesNsName = statsPrefix + "UserTableQueryTimesNs" userTransactionCountName = statsPrefix + "UserTransactionCount" userTransactionTimesNsName = statsPrefix + "UserTransactionTimesNs" } resultBuckets := []int64{0, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000} queryStats := stats.NewTimings(queryStatsName) return &QueryServiceStats{ MySQLStats: stats.NewTimings(mysqlStatsName), QueryStats: queryStats, WaitStats: stats.NewTimings(waitStatsName), KillStats: stats.NewCounters(killStatsName, "Transactions", "Queries"), InfoErrors: stats.NewCounters(infoErrorsName, "Retry", "Fatal", "DupKey"), ErrorStats: stats.NewCounters(errorStatsName, "Fail", "TxPoolFull", "NotInTx", "Deadlock"), InternalErrors: stats.NewCounters(internalErrorsName, "Task", "MemcacheStats", "Mismatch", "StrayTransactions", "Invalidation", "Panic", "HungQuery", "Schema"), UserTableQueryCount: stats.NewMultiCounters( userTableQueryCountName, []string{"TableName", "CallerID", "Type"}), UserTableQueryTimesNs: stats.NewMultiCounters( userTableQueryTimesNsName, []string{"TableName", "CallerID", "Type"}), UserTransactionCount: stats.NewMultiCounters( userTransactionCountName, []string{"CallerID", "Conclusion"}), UserTransactionTimesNs: stats.NewMultiCounters( userTransactionTimesNsName, []string{"CallerID", "Conclusion"}), // Sample every 5 seconds and keep samples for up to 15 minutes. QPSRates: stats.NewRates(qpsRateName, queryStats, 15*60/5, 5*time.Second), ResultStats: stats.NewHistogram(resultStatsName, resultBuckets), SpotCheckCount: stats.NewInt(spotCheckCountName), } }
func TestInt(t *testing.T) { v := stats.NewInt("") load := func() { v.Set(1234) } testMetric(t, v, load, `Desc{fqName: "test_name", help: "test_help", constLabels: {}, variableLabels: []}`, `gauge:<value:1234 > `, ) }
func init() { t, err := time.Parse(time.UnixDate, buildTime) if buildTime != "" && err != nil { panic(fmt.Sprintf("Couldn't parse build timestamp %q: %v", buildTime, err)) } stats.NewString("BuildHost").Set(buildHost) stats.NewString("BuildUser").Set(buildUser) stats.NewInt("BuildTimestamp").Set(t.Unix()) stats.NewString("BuildGitRev").Set(buildGitRev) }
// NewActionAgent creates a new ActionAgent and registers all the // associated services func NewActionAgent( tabletAlias topo.TabletAlias, dbcfgs *dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, securePort int, overridesFile string, lockTimeout time.Duration, ) (agent *ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() mysqld := mysqlctl.NewMysqld("Dba", mycnf, &dbcfgs.Dba, &dbcfgs.Repl) agent = &ActionAgent{ TopoServer: topoServer, TabletAlias: tabletAlias, Mysqld: mysqld, MysqlDaemon: mysqld, DBConfigs: dbcfgs, SchemaOverrides: schemaOverrides, LockTimeout: lockTimeout, done: make(chan struct{}), History: history.New(historyLength), lastHealthMapCount: stats.NewInt("LastHealthMapCount"), changeItems: make(chan tabletChangeItem, 100), } // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } if err := agent.Start(mysqlPort, port, securePort); err != nil { return nil, err } // register the RPC services from the agent agent.registerQueryService() // start health check if needed agent.initHeathCheck() return agent, nil }
func NewZkReader(resolveLocal bool, preload []string) *ZkReader { zkr := &ZkReader{zcell: make(map[string]*zkCell), resolveLocal: resolveLocal} if resolveLocal { zkr.localCell = zk.GuessLocalCell() } zkr.rpcCalls = stats.NewInt("ZkReaderRpcCalls") zkr.unknownCellErrors = stats.NewInt("ZkReaderUnknownCellErrors") zkr.zkrStats = newZkrStats() stats.PublishJSONFunc("ZkReader", zkr.statsJSON) // start some cells for _, cellName := range preload { _, path, err := zkr.getCell("/zk/" + cellName) if err != nil { log.Errorf("Cell " + cellName + " could not be preloaded: " + err.Error()) } else { log.Infof("Cell " + cellName + " preloaded for: " + path) } } return zkr }
// NewQueryEngine creates a new QueryEngine. // This is a singleton class. // You must call this only once. func NewQueryEngine(config Config) *QueryEngine { qe := &QueryEngine{} qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9)) mysqlStats = stats.NewTimings("Mysql") // Pools qe.cachePool = NewCachePool("Rowcache", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9)) qe.connPool = dbconnpool.NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9)) qe.streamConnPool = dbconnpool.NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9)) qe.txPool = dbconnpool.NewConnectionPool("TransactionPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap // Services qe.activeTxPool = NewActiveTxPool("ActiveTransactionPool", time.Duration(config.TransactionTimeout*1e9)) qe.connKiller = NewConnectionKiller(1, time.Duration(config.IdleTimeout*1e9)) qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), qe.connKiller) qe.consolidator = NewConsolidator() qe.invalidator = NewRowcacheInvalidator(qe) qe.streamQList = NewQueryList(qe.connKiller) // Vars qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER) if config.StrictMode { qe.strictMode.Set(1) } qe.strictTableAcl = config.StrictTableAcl qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize) qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize) // loggers qe.accessCheckerLogger = logutil.NewThrottledLogger("accessChecker", 1*time.Second) // Stats stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get)) stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get)) queryStats = stats.NewTimings("Queries") QPSRates = stats.NewRates("QPS", queryStats, 15, 60*time.Second) waitStats = stats.NewTimings("Waits") killStats = stats.NewCounters("Kills") infoErrors = stats.NewCounters("InfoErrors") errorStats = stats.NewCounters("Errors") internalErrors = stats.NewCounters("InternalErrors") resultStats = stats.NewHistogram("Results", resultBuckets) stats.Publish("RowcacheSpotCheckRatio", stats.FloatFunc(func() float64 { return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER })) spotCheckCount = stats.NewInt("RowcacheSpotCheckCount") return qe }
// NewQueryEngine creates a new QueryEngine. // This is a singleton class. // You must call this only once. func NewQueryEngine(config Config) *QueryEngine { qe := &QueryEngine{} // services qe.cachePool = NewCachePool("RowcachePool", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9)) qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9), config.SensitiveMode) qe.connPool = NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9)) qe.streamConnPool = NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9)) qe.txPool = NewConnectionPool("TransactionPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap qe.activeTxPool = NewActiveTxPool("ActiveTransactionPool", time.Duration(config.TransactionTimeout*1e9)) qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9)) qe.consolidator = NewConsolidator() // vars qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER) if config.StrictMode { qe.strictMode.Set(1) } qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize) qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize) // stats stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get)) stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get)) queryStats = stats.NewTimings("Queries") QPSRates = stats.NewRates("QPS", queryStats, 15, 60*time.Second) waitStats = stats.NewTimings("Waits") killStats = stats.NewCounters("Kills") infoErrors = stats.NewCounters("InfoErrors") errorStats = stats.NewCounters("Errors") internalErrors = stats.NewCounters("InternalErrors") resultStats = stats.NewHistogram("Results", resultBuckets) stats.Publish("SpotCheckRatio", stats.FloatFunc(func() float64 { return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER })) spotCheckCount = stats.NewInt("SpotCheckCount") return qe }
// NewQueryServiceStats returns a new QueryServiceStats instance. func NewQueryServiceStats(statsPrefix string, enablePublishStats bool) *QueryServiceStats { mysqlStatsName := "" queryStatsName := "" qpsRateName := "" waitStatsName := "" killStatsName := "" infoErrorsName := "" errorStatsName := "" internalErrorsName := "" resultStatsName := "" spotCheckCountName := "" if enablePublishStats { mysqlStatsName = statsPrefix + "Mysql" queryStatsName = statsPrefix + "Queries" qpsRateName = statsPrefix + "QPS" waitStatsName = statsPrefix + "Waits" killStatsName = statsPrefix + "Kills" infoErrorsName = statsPrefix + "InfoErrors" errorStatsName = statsPrefix + "Errors" internalErrorsName = statsPrefix + "InternalErrors" resultStatsName = statsPrefix + "Results" spotCheckCountName = statsPrefix + "RowcacheSpotCheckCount" } resultBuckets := []int64{0, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000} queryStats := stats.NewTimings(queryStatsName) return &QueryServiceStats{ MySQLStats: stats.NewTimings(mysqlStatsName), QueryStats: queryStats, WaitStats: stats.NewTimings(waitStatsName), KillStats: stats.NewCounters(killStatsName), InfoErrors: stats.NewCounters(infoErrorsName), ErrorStats: stats.NewCounters(errorStatsName), InternalErrors: stats.NewCounters(internalErrorsName), QPSRates: stats.NewRates(qpsRateName, queryStats, 15, 60*time.Second), ResultStats: stats.NewHistogram(resultStatsName, resultBuckets), SpotCheckCount: stats.NewInt(spotCheckCountName), } }
Run(context.Context) error } var ( retryDuration = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)") executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls") remoteActionsTimeout = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)") useV3ReshardingMode = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.") healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology") healthcheckRetryDelay = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck") healthCheckTimeout = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period") statsState = stats.NewString("WorkerState") // statsRetryCount is the total number of times a query to vttablet had to be retried. statsRetryCount = stats.NewInt("WorkerRetryCount") // statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly". statsRetryCounters = stats.NewCounters("WorkerRetryCounters") // statsThrottledCounters is the number of times a write has been throttled, // grouped by (keyspace, shard, threadID). Mainly used for testing. // If throttling is enabled, this should always be non-zero for all threads. statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"}) // statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing. statsStateDurationsNs = stats.NewCounters("WorkerStateDurations") // statsOnlineInsertsCounters tracks for every table how many rows were // inserted during the online clone (reconciliation) phase. statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters") // statsOnlineUpdatesCounters tracks for every table how many rows were updated. statsOnlineUpdatesCounters = stats.NewCounters("WorkerOnlineUpdatesCounters") // statsOnlineUpdatesCounters tracks for every table how many rows were deleted. statsOnlineDeletesCounters = stats.NewCounters("WorkerOnlineDeletesCounters")
querypb "github.com/youtube/vitess/go/vt/proto/query" ) // NewConnFunc is a factory method that creates a Conn instance // using given ConnParams. type NewConnFunc func(params ConnParams) (Conn, error) var ( defaultConn NewConnFunc // mu protects conns. mu sync.Mutex conns = make(map[string]NewConnFunc) // Keep track of the total number of connections opened to MySQL. This is mainly // useful for tests, where we use this to approximate the number of ports that we used. connCount = stats.NewInt("mysql-new-connection-count") ) // Conn defines the behavior for the low level db connection type Conn interface { // ExecuteFetch executes the query on the connection ExecuteFetch(query string, maxrows int, wantfields bool) (*sqltypes.Result, error) // ExecuteFetchMap returns a map from column names to cell data for a query // that should return exactly 1 row. ExecuteFetchMap(query string) (map[string]string, error) // ExecuteStreamFetch starts a streaming query to db server. Use FetchNext // on the Connection until it returns nil or error ExecuteStreamFetch(query string) error // Close closes the db connection Close() // IsClosed returns if the connection was ever closed
const ( DISABLED int64 = iota ENABLED ) var usStateNames = map[int64]string{ ENABLED: "Enabled", DISABLED: "Disabled", } var ( streamCount = stats.NewCounters("UpdateStreamStreamCount") updateStreamErrors = stats.NewCounters("UpdateStreamErrors") updateStreamEvents = stats.NewCounters("UpdateStreamEvents") keyrangeStatements = stats.NewInt("UpdateStreamKeyRangeStatements") keyrangeTransactions = stats.NewInt("UpdateStreamKeyRangeTransactions") tablesStatements = stats.NewInt("UpdateStreamTablesStatements") tablesTransactions = stats.NewInt("UpdateStreamTablesTransactions") ) type UpdateStream struct { mycnf *mysqlctl.Mycnf actionLock sync.Mutex state sync2.AtomicInt64 mysqld *mysqlctl.Mysqld stateWaitGroup sync.WaitGroup dbname string streams streamList }
// NewActionAgent creates a new ActionAgent and registers all the // associated services. // // batchCtx is the context that the agent will use for any background tasks // it spawns. func NewActionAgent( batchCtx context.Context, mysqld mysqlctl.MysqlDaemon, queryServiceControl tabletserver.QueryServiceControl, tabletAlias topo.TabletAlias, dbcfgs *dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, securePort, gRPCPort int, overridesFile string, lockTimeout time.Duration, ) (agent *ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() agent = &ActionAgent{ QueryServiceControl: queryServiceControl, HealthReporter: health.DefaultAggregator, batchCtx: batchCtx, TopoServer: topoServer, TabletAlias: tabletAlias, MysqlDaemon: mysqld, DBConfigs: dbcfgs, SchemaOverrides: schemaOverrides, LockTimeout: lockTimeout, History: history.New(historyLength), lastHealthMapCount: stats.NewInt("LastHealthMapCount"), _healthy: fmt.Errorf("healthcheck not run yet"), healthStreamMap: make(map[int]chan<- *actionnode.HealthStreamReply), } // try to initialize the tablet if we have to if err := agent.InitTablet(port, securePort, gRPCPort); err != nil { return nil, fmt.Errorf("agent.InitTablet failed: %v", err) } // Publish and set the TargetTabletType. Not a global var // since it should never be changed. statsTabletType := stats.NewString("TargetTabletType") statsTabletType.Set(*targetTabletType) // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } if err := agent.Start(batchCtx, mysqlPort, port, securePort, gRPCPort); err != nil { return nil, err } // register the RPC services from the agent agent.registerQueryService() // two cases then: // - restoreFromBackup is set: we restore, then initHealthCheck, all // in the background // - restoreFromBackup is not set: we initHealthCheck right away if *restoreFromBackup { go func() { // restoreFromBackup wil just be a regular action // (same as if it was triggered remotely) if err := agent.RestoreFromBackup(batchCtx); err != nil { println(fmt.Sprintf("RestoreFromBackup failed: %v", err)) log.Fatalf("RestoreFromBackup failed: %v", err) } // after the restore is done, start health check agent.initHeathCheck() }() } else { // synchronously start health check if needed agent.initHeathCheck() } return agent, nil }
Run(context.Context) error } var ( retryDuration = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)") executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls") remoteActionsTimeout = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)") useV3ReshardingMode = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.") healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology") healthcheckRetryDelay = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck") healthCheckTimeout = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period") statsState = stats.NewString("WorkerState") // statsRetryCount is the total number of times a query to vttablet had to be retried. statsRetryCount = stats.NewInt("WorkerRetryCount") // statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly". statsRetryCounters = stats.NewCounters("WorkerRetryCounters") // statsThrottledCounters is the number of times a write has been throttled, // grouped by (keyspace, shard, threadID). Mainly used for testing. // If throttling is enabled, this should always be non-zero for all threads. statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"}) // statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing. statsStateDurationsNs = stats.NewCounters("WorkerStateDurations") // statsOnlineInsertsCounters tracks for every table how many rows were // inserted during the online clone (reconciliation) phase. statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters") // statsOnlineUpdatesCounters tracks for every table how many rows were updated. statsOnlineUpdatesCounters = stats.NewCounters("WorkerOnlineUpdatesCounters") // statsOnlineUpdatesCounters tracks for every table how many rows were deleted.
import ( "encoding/hex" "fmt" "log" "strings" "unicode" "github.com/youtube/vitess/go/stats" ) const ( filteredReplicationUnfriendlyAnnotation = "/* vtgate:: filtered_replication_unfriendly */" ) var ( filteredReplicationUnfriendlyStatementsCount = stats.NewInt("FilteredReplicationUnfriendlyStatementsCount") ) // AnnotateIfDML annotates 'sql' based on 'keyspaceIDs' // // If 'sql' is not a DML statement no annotation is added. // If 'sql' is a DML statement and contains exactly one keyspaceID // it is used to annotate 'sql' // Otherwise 'sql' is annotated as replication-unfriendly. func AnnotateIfDML(sql string, keyspaceIDs [][]byte) string { if !IsDML(sql) { return sql } if len(keyspaceIDs) == 1 { return AddKeyspaceID(sql, keyspaceIDs[0], "") }
// NewActionAgent creates a new ActionAgent and registers all the // associated services. // // batchCtx is the context that the agent will use for any background tasks // it spawns. func NewActionAgent( batchCtx context.Context, mysqld mysqlctl.MysqlDaemon, queryServiceControl tabletserver.Controller, tabletAlias *topodatapb.TabletAlias, dbcfgs dbconfigs.DBConfigs, mycnf *mysqlctl.Mycnf, port, gRPCPort int32, overridesFile string, ) (agent *ActionAgent, err error) { schemaOverrides := loadSchemaOverrides(overridesFile) topoServer := topo.GetServer() agent = &ActionAgent{ QueryServiceControl: queryServiceControl, HealthReporter: health.DefaultAggregator, batchCtx: batchCtx, TopoServer: topoServer, TabletAlias: tabletAlias, MysqlDaemon: mysqld, DBConfigs: dbcfgs, SchemaOverrides: schemaOverrides, History: history.New(historyLength), lastHealthMapCount: stats.NewInt("LastHealthMapCount"), _healthy: fmt.Errorf("healthcheck not run yet"), } agent.registerQueryRuleSources() // try to initialize the tablet if we have to if err := agent.InitTablet(port, gRPCPort); err != nil { return nil, fmt.Errorf("agent.InitTablet failed: %v", err) } // Publish and set the TargetTabletType. Not a global var // since it should never be changed. statsTabletType := stats.NewString("TargetTabletType") statsTabletType.Set(*targetTabletType) // Create the TabletType stats agent.exportStats = true agent.statsTabletType = stats.NewString("TabletType") // Start the binlog player services, not playing at start. agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient { return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered) }) // Stop all binlog players upon entering lameduck. servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset) RegisterBinlogPlayerMap(agent.BinlogPlayerMap) // try to figure out the mysql port mysqlPort := mycnf.MysqlPort if mysqlPort == 0 { // we don't know the port, try to get it from mysqld var err error mysqlPort, err = mysqld.GetMysqlPort() if err != nil { log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err) } } // Start will get the tablet info, and update our state from it if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil { return nil, err } // register the RPC services from the agent agent.registerQueryService() // two cases then: // - restoreFromBackup is set: we restore, then initHealthCheck, all // in the background // - restoreFromBackup is not set: we initHealthCheck right away if *restoreFromBackup { go func() { // restoreFromBackup wil just be a regular action // (same as if it was triggered remotely) if err := agent.RestoreFromBackup(batchCtx); err != nil { println(fmt.Sprintf("RestoreFromBackup failed: %v", err)) log.Fatalf("RestoreFromBackup failed: %v", err) } // after the restore is done, start health check agent.initHealthCheck() }() } else { // synchronously start health check if needed agent.initHealthCheck() } return agent, nil }
"time" mproto "github.com/youtube/vitess/go/mysql/proto" "github.com/youtube/vitess/go/stats" "github.com/youtube/vitess/go/sync2" "github.com/youtube/vitess/go/timer" "github.com/youtube/vitess/go/vt/concurrency" tproto "github.com/youtube/vitess/go/vt/tabletserver/proto" "github.com/youtube/vitess/go/vt/tabletserver/tabletconn" "github.com/youtube/vitess/go/vt/topo" "golang.org/x/net/context" pb "github.com/youtube/vitess/go/vt/proto/topodata" ) var danglingTabletConn = stats.NewInt("DanglingTabletConn") // ShardConn represents a load balanced connection to a group // of vttablets that belong to the same shard. ShardConn can // be concurrently used across goroutines. Such requests are // interleaved on the same underlying connection. type ShardConn struct { keyspace string shard string tabletType topo.TabletType retryDelay time.Duration retryCount int connTimeoutTotal time.Duration connTimeoutPerConn time.Duration connLife time.Duration balancer *Balancer
if len(lines) == 0 { emptyCount.Add(1) log.Warningf("no results for %v %v", req.qtype, req.qname) } return lines, nil } func write(w io.Writer, line string) { _, err := io.WriteString(w, line) if err != nil { log.Errorf("write failed: %v", err) } } var ( requestCount = stats.NewInt("PdnsRequestCount") errorCount = stats.NewInt("PdnsErrorCount") emptyCount = stats.NewInt("PdnsEmptyCount") ) func (pd *pdns) Serve(r io.Reader, w io.Writer) { log.Infof("starting zkns resolver") bufr := bufio.NewReader(r) needHandshake := true for { line, isPrefix, err := bufr.ReadLine() if err == nil && isPrefix { err = errLongLine } if err == io.EOF { return
// ResolveDestinationMasters forces the worker to (re)resolve the topology and update // the destination masters that it knows about. ResolveDestinationMasters(ctx context.Context) error // GetDestinationMaster returns the most recently resolved destination master for a particular shard. GetDestinationMaster(shardName string) (*topo.TabletInfo, error) } var ( resolveTTL = flag.Duration("resolve_ttl", 15*time.Second, "Amount of time that a topo resolution can be cached for") executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls") remoteActionsTimeout = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)") statsState = stats.NewString("WorkerState") // the number of times that the worker attempst to reresolve the masters statsDestinationAttemptedResolves = stats.NewInt("WorkerDestinationAttemptedResolves") // the number of times that the worker actually hits the topo server, i.e., they don't // use a cached topology statsDestinationActualResolves = stats.NewInt("WorkerDestinationActualResolves") statsRetryCounters = stats.NewCounters("WorkerRetryCount") ) // resetVars resets the debug variables that are meant to provide information on a // per-run basis. This should be called at the beginning of each worker run. func resetVars() { statsState.Set("") statsDestinationAttemptedResolves.Set(0) statsDestinationActualResolves.Set(0) statsRetryCounters.Reset() }
"time" "github.com/youtube/vitess/go/stats" topodatapb "github.com/youtube/vitess/go/vt/proto/topodata" vtrpcpb "github.com/youtube/vitess/go/vt/proto/vtrpc" "github.com/youtube/vitess/go/vt/vterrors" ) var ( enableFakeMasterBuffer = flag.Bool("enable_fake_master_buffer", false, "Enable fake master buffering.") bufferKeyspace = flag.String("buffer_keyspace", "", "The name of the keyspace to buffer master requests on.") bufferShard = flag.String("buffer_shard", "", "The name of the shard to buffer master requests on.") maxBufferSize = flag.Int("max_buffer_size", 10, "The maximum number of master requests to buffer at a time.") fakeBufferDelay = flag.Duration("fake_buffer_delay", 1*time.Second, "The amount of time that we should delay all master requests for, to fake a buffer.") bufferedRequestsAttempted = stats.NewInt("BufferedRequestsAttempted") bufferedRequestsSuccessful = stats.NewInt("BufferedRequestsSuccessful") // Use this lock when adding to the number of currently buffered requests. bufferMu sync.Mutex bufferedRequests = stats.NewInt("BufferedRequests") ) // timeSleep can be mocked out in unit tests var timeSleep = time.Sleep // errBufferFull is the error returned a buffer request is rejected because the buffer is full. var errBufferFull = vterrors.FromError( vtrpcpb.ErrorCode_TRANSIENT_ERROR, errors.New("master request buffer full, rejecting request"), )
"net/http" "time" log "github.com/golang/glog" rpc "github.com/youtube/vitess/go/rpcplus" "github.com/youtube/vitess/go/rpcwrap/auth" "github.com/youtube/vitess/go/rpcwrap/proto" "github.com/youtube/vitess/go/stats" ) const ( connected = "200 Connected to Go RPC" ) var ( connCount = stats.NewInt("connection-count") connAccepted = stats.NewInt("connection-accepted") ) type ClientCodecFactory func(conn io.ReadWriteCloser) rpc.ClientCodec type BufferedConnection struct { isClosed bool *bufio.Reader io.WriteCloser } func NewBufferedConnection(conn io.ReadWriteCloser) *BufferedConnection { connCount.Add(1) connAccepted.Add(1) return &BufferedConnection{false, bufio.NewReader(conn), conn}
package umgmt import ( "crypto/tls" "crypto/x509" "net" "net/http" "os" "strings" "sync" log "github.com/golang/glog" "github.com/youtube/vitess/go/stats" ) var connectionCount = stats.NewInt("ConnectionCount") var connectionAccepted = stats.NewInt("ConnectionAccepted") type connCountConn struct { net.Conn listener *connCountListener closed bool } func (c *connCountConn) Close() (err error) { // in case there is a race closing a client if c.closed { return nil } err = c.Conn.Close() c.closed = true