// Published creates a wrapper for net.Listener that
// publishes connection stats.
func Published(l net.Listener, countTag, acceptTag string) net.Listener {
	return &CountingListener{
		Listener:   l,
		ConnCount:  stats.NewInt(countTag),
		ConnAccept: stats.NewInt(acceptTag),
	}
}
Beispiel #2
0
func Init() {
	mu.Lock()
	defer mu.Unlock()
	if inited {
		log.Fatal("servenv.Init called second time")
	}
	inited = true

	// Once you run as root, you pretty much destroy the chances of a
	// non-privileged user starting the program correctly.
	if uid := os.Getuid(); uid == 0 {
		log.Fatalf("servenv.Init: running this as root makes no sense")
	}

	runtime.MemProfileRate = *memProfileRate
	gomaxprocs := os.Getenv("GOMAXPROCS")
	if gomaxprocs == "" {
		gomaxprocs = "1"
	}

	// We used to set this limit directly, but you pretty much have to
	// use a root account to allow increasing a limit reliably. Dropping
	// privileges is also tricky. The best strategy is to make a shell
	// script set up the limits as root and switch users before starting
	// the server.
	fdLimit := &syscall.Rlimit{}
	if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, fdLimit); err != nil {
		log.Errorf("max-open-fds failed: %v", err)
	}
	fdl := stats.NewInt("MaxFds")
	fdl.Set(int64(fdLimit.Cur))

	onInitHooks.Fire()
}
Beispiel #3
0
func NewQueryEngine(config Config) *QueryEngine {
	qe := &QueryEngine{}
	qe.cachePool = NewCachePool("CachePool", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.connPool = NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamConnPool = NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamTokens = sync2.NewSemaphore(config.StreamExecThrottle, time.Duration(config.StreamWaitTimeout*1e9))
	qe.reservedPool = NewReservedPool("ReservedPool")
	qe.txPool = NewConnectionPool("TxPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap
	qe.activeTxPool = NewActiveTxPool("ActiveTxPool", time.Duration(config.TransactionTimeout*1e9))
	qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.consolidator = NewConsolidator()
	qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER)
	qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize)
	qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize)
	stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get))
	stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get))
	queryStats = stats.NewTimings("Queries")
	stats.NewRates("QPS", queryStats, 15, 60e9)
	waitStats = stats.NewTimings("Waits")
	killStats = stats.NewCounters("Kills")
	errorStats = stats.NewCounters("Errors")
	resultStats = stats.NewHistogram("Results", resultBuckets)
	stats.Publish("SpotCheckRatio", stats.FloatFunc(func() float64 {
		return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER
	}))
	spotCheckCount = stats.NewInt("SpotCheckCount")
	return qe
}
Beispiel #4
0
func init() {
	// init global goroutines to aggregate stats.
	aggrChan = make(chan *queryInfo, aggrChanSize)
	gatewayStatsChanFull = stats.NewInt("GatewayStatsChanFullCount")
	go resetAggregators()
	go processQueryInfo()
}
Beispiel #5
0
// NewQueryServiceStats returns a new QueryServiceStats instance.
func NewQueryServiceStats(statsPrefix string, enablePublishStats bool) *QueryServiceStats {
	mysqlStatsName := ""
	queryStatsName := ""
	qpsRateName := ""
	waitStatsName := ""
	killStatsName := ""
	infoErrorsName := ""
	errorStatsName := ""
	internalErrorsName := ""
	resultStatsName := ""
	spotCheckCountName := ""
	userTableQueryCountName := ""
	userTableQueryTimesNsName := ""
	userTransactionCountName := ""
	userTransactionTimesNsName := ""
	if enablePublishStats {
		mysqlStatsName = statsPrefix + "Mysql"
		queryStatsName = statsPrefix + "Queries"
		qpsRateName = statsPrefix + "QPS"
		waitStatsName = statsPrefix + "Waits"
		killStatsName = statsPrefix + "Kills"
		infoErrorsName = statsPrefix + "InfoErrors"
		errorStatsName = statsPrefix + "Errors"
		internalErrorsName = statsPrefix + "InternalErrors"
		resultStatsName = statsPrefix + "Results"
		spotCheckCountName = statsPrefix + "RowcacheSpotCheckCount"
		userTableQueryCountName = statsPrefix + "UserTableQueryCount"
		userTableQueryTimesNsName = statsPrefix + "UserTableQueryTimesNs"
		userTransactionCountName = statsPrefix + "UserTransactionCount"
		userTransactionTimesNsName = statsPrefix + "UserTransactionTimesNs"
	}
	resultBuckets := []int64{0, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000}
	queryStats := stats.NewTimings(queryStatsName)
	return &QueryServiceStats{
		MySQLStats: stats.NewTimings(mysqlStatsName),
		QueryStats: queryStats,
		WaitStats:  stats.NewTimings(waitStatsName),
		KillStats:  stats.NewCounters(killStatsName, "Transactions", "Queries"),
		InfoErrors: stats.NewCounters(infoErrorsName, "Retry", "Fatal", "DupKey"),
		ErrorStats: stats.NewCounters(errorStatsName, "Fail", "TxPoolFull", "NotInTx", "Deadlock"),
		InternalErrors: stats.NewCounters(internalErrorsName, "Task", "MemcacheStats",
			"Mismatch", "StrayTransactions", "Invalidation", "Panic", "HungQuery", "Schema"),
		UserTableQueryCount: stats.NewMultiCounters(
			userTableQueryCountName, []string{"TableName", "CallerID", "Type"}),
		UserTableQueryTimesNs: stats.NewMultiCounters(
			userTableQueryTimesNsName, []string{"TableName", "CallerID", "Type"}),
		UserTransactionCount: stats.NewMultiCounters(
			userTransactionCountName, []string{"CallerID", "Conclusion"}),
		UserTransactionTimesNs: stats.NewMultiCounters(
			userTransactionTimesNsName, []string{"CallerID", "Conclusion"}),
		// Sample every 5 seconds and keep samples for up to 15 minutes.
		QPSRates:       stats.NewRates(qpsRateName, queryStats, 15*60/5, 5*time.Second),
		ResultStats:    stats.NewHistogram(resultStatsName, resultBuckets),
		SpotCheckCount: stats.NewInt(spotCheckCountName),
	}
}
Beispiel #6
0
func TestInt(t *testing.T) {
	v := stats.NewInt("")
	load := func() {
		v.Set(1234)
	}
	testMetric(t, v, load,
		`Desc{fqName: "test_name", help: "test_help", constLabels: {}, variableLabels: []}`,
		`gauge:<value:1234 > `,
	)
}
Beispiel #7
0
func init() {
	t, err := time.Parse(time.UnixDate, buildTime)
	if buildTime != "" && err != nil {
		panic(fmt.Sprintf("Couldn't parse build timestamp %q: %v", buildTime, err))
	}
	stats.NewString("BuildHost").Set(buildHost)
	stats.NewString("BuildUser").Set(buildUser)
	stats.NewInt("BuildTimestamp").Set(t.Unix())
	stats.NewString("BuildGitRev").Set(buildGitRev)
}
Beispiel #8
0
// NewActionAgent creates a new ActionAgent and registers all the
// associated services
func NewActionAgent(
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort int,
	overridesFile string,
	lockTimeout time.Duration,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()
	mysqld := mysqlctl.NewMysqld("Dba", mycnf, &dbcfgs.Dba, &dbcfgs.Repl)

	agent = &ActionAgent{
		TopoServer:         topoServer,
		TabletAlias:        tabletAlias,
		Mysqld:             mysqld,
		MysqlDaemon:        mysqld,
		DBConfigs:          dbcfgs,
		SchemaOverrides:    schemaOverrides,
		LockTimeout:        lockTimeout,
		done:               make(chan struct{}),
		History:            history.New(historyLength),
		lastHealthMapCount: stats.NewInt("LastHealthMapCount"),
		changeItems:        make(chan tabletChangeItem, 100),
	}

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld)
	RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)
		}
	}

	if err := agent.Start(mysqlPort, port, securePort); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.registerQueryService()

	// start health check if needed
	agent.initHeathCheck()

	return agent, nil
}
Beispiel #9
0
func NewZkReader(resolveLocal bool, preload []string) *ZkReader {
	zkr := &ZkReader{zcell: make(map[string]*zkCell), resolveLocal: resolveLocal}
	if resolveLocal {
		zkr.localCell = zk.GuessLocalCell()
	}
	zkr.rpcCalls = stats.NewInt("ZkReaderRpcCalls")
	zkr.unknownCellErrors = stats.NewInt("ZkReaderUnknownCellErrors")
	zkr.zkrStats = newZkrStats()

	stats.PublishJSONFunc("ZkReader", zkr.statsJSON)

	// start some cells
	for _, cellName := range preload {
		_, path, err := zkr.getCell("/zk/" + cellName)
		if err != nil {
			log.Errorf("Cell " + cellName + " could not be preloaded: " + err.Error())
		} else {
			log.Infof("Cell " + cellName + " preloaded for: " + path)
		}
	}
	return zkr
}
Beispiel #10
0
// NewQueryEngine creates a new QueryEngine.
// This is a singleton class.
// You must call this only once.
func NewQueryEngine(config Config) *QueryEngine {
	qe := &QueryEngine{}
	qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9))

	mysqlStats = stats.NewTimings("Mysql")

	// Pools
	qe.cachePool = NewCachePool("Rowcache", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.connPool = dbconnpool.NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamConnPool = dbconnpool.NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.txPool = dbconnpool.NewConnectionPool("TransactionPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap

	// Services
	qe.activeTxPool = NewActiveTxPool("ActiveTransactionPool", time.Duration(config.TransactionTimeout*1e9))
	qe.connKiller = NewConnectionKiller(1, time.Duration(config.IdleTimeout*1e9))
	qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), qe.connKiller)
	qe.consolidator = NewConsolidator()
	qe.invalidator = NewRowcacheInvalidator(qe)
	qe.streamQList = NewQueryList(qe.connKiller)

	// Vars
	qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER)
	if config.StrictMode {
		qe.strictMode.Set(1)
	}
	qe.strictTableAcl = config.StrictTableAcl
	qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize)
	qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize)

	// loggers
	qe.accessCheckerLogger = logutil.NewThrottledLogger("accessChecker", 1*time.Second)

	// Stats
	stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get))
	stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get))
	queryStats = stats.NewTimings("Queries")
	QPSRates = stats.NewRates("QPS", queryStats, 15, 60*time.Second)
	waitStats = stats.NewTimings("Waits")
	killStats = stats.NewCounters("Kills")
	infoErrors = stats.NewCounters("InfoErrors")
	errorStats = stats.NewCounters("Errors")
	internalErrors = stats.NewCounters("InternalErrors")
	resultStats = stats.NewHistogram("Results", resultBuckets)
	stats.Publish("RowcacheSpotCheckRatio", stats.FloatFunc(func() float64 {
		return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER
	}))
	spotCheckCount = stats.NewInt("RowcacheSpotCheckCount")

	return qe
}
Beispiel #11
0
// NewQueryEngine creates a new QueryEngine.
// This is a singleton class.
// You must call this only once.
func NewQueryEngine(config Config) *QueryEngine {
	qe := &QueryEngine{}

	// services
	qe.cachePool = NewCachePool("RowcachePool", config.RowCache, time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.schemaInfo = NewSchemaInfo(config.QueryCacheSize, time.Duration(config.SchemaReloadTime*1e9), time.Duration(config.IdleTimeout*1e9), config.SensitiveMode)
	qe.connPool = NewConnectionPool("ConnPool", config.PoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.streamConnPool = NewConnectionPool("StreamConnPool", config.StreamPoolSize, time.Duration(config.IdleTimeout*1e9))
	qe.txPool = NewConnectionPool("TransactionPool", config.TransactionCap, time.Duration(config.IdleTimeout*1e9)) // connections in pool has to be > transactionCap
	qe.activeTxPool = NewActiveTxPool("ActiveTransactionPool", time.Duration(config.TransactionTimeout*1e9))
	qe.activePool = NewActivePool("ActivePool", time.Duration(config.QueryTimeout*1e9), time.Duration(config.IdleTimeout*1e9))
	qe.consolidator = NewConsolidator()

	// vars
	qe.spotCheckFreq = sync2.AtomicInt64(config.SpotCheckRatio * SPOT_CHECK_MULTIPLIER)
	if config.StrictMode {
		qe.strictMode.Set(1)
	}
	qe.maxResultSize = sync2.AtomicInt64(config.MaxResultSize)
	qe.streamBufferSize = sync2.AtomicInt64(config.StreamBufferSize)

	// stats
	stats.Publish("MaxResultSize", stats.IntFunc(qe.maxResultSize.Get))
	stats.Publish("StreamBufferSize", stats.IntFunc(qe.streamBufferSize.Get))
	queryStats = stats.NewTimings("Queries")
	QPSRates = stats.NewRates("QPS", queryStats, 15, 60*time.Second)
	waitStats = stats.NewTimings("Waits")
	killStats = stats.NewCounters("Kills")
	infoErrors = stats.NewCounters("InfoErrors")
	errorStats = stats.NewCounters("Errors")
	internalErrors = stats.NewCounters("InternalErrors")
	resultStats = stats.NewHistogram("Results", resultBuckets)
	stats.Publish("SpotCheckRatio", stats.FloatFunc(func() float64 {
		return float64(qe.spotCheckFreq.Get()) / SPOT_CHECK_MULTIPLIER
	}))
	spotCheckCount = stats.NewInt("SpotCheckCount")

	return qe
}
// NewQueryServiceStats returns a new QueryServiceStats instance.
func NewQueryServiceStats(statsPrefix string, enablePublishStats bool) *QueryServiceStats {
	mysqlStatsName := ""
	queryStatsName := ""
	qpsRateName := ""
	waitStatsName := ""
	killStatsName := ""
	infoErrorsName := ""
	errorStatsName := ""
	internalErrorsName := ""
	resultStatsName := ""
	spotCheckCountName := ""
	if enablePublishStats {
		mysqlStatsName = statsPrefix + "Mysql"
		queryStatsName = statsPrefix + "Queries"
		qpsRateName = statsPrefix + "QPS"
		waitStatsName = statsPrefix + "Waits"
		killStatsName = statsPrefix + "Kills"
		infoErrorsName = statsPrefix + "InfoErrors"
		errorStatsName = statsPrefix + "Errors"
		internalErrorsName = statsPrefix + "InternalErrors"
		resultStatsName = statsPrefix + "Results"
		spotCheckCountName = statsPrefix + "RowcacheSpotCheckCount"
	}
	resultBuckets := []int64{0, 1, 5, 10, 50, 100, 500, 1000, 5000, 10000}
	queryStats := stats.NewTimings(queryStatsName)
	return &QueryServiceStats{
		MySQLStats:     stats.NewTimings(mysqlStatsName),
		QueryStats:     queryStats,
		WaitStats:      stats.NewTimings(waitStatsName),
		KillStats:      stats.NewCounters(killStatsName),
		InfoErrors:     stats.NewCounters(infoErrorsName),
		ErrorStats:     stats.NewCounters(errorStatsName),
		InternalErrors: stats.NewCounters(internalErrorsName),
		QPSRates:       stats.NewRates(qpsRateName, queryStats, 15, 60*time.Second),
		ResultStats:    stats.NewHistogram(resultStatsName, resultBuckets),
		SpotCheckCount: stats.NewInt(spotCheckCountName),
	}
}
Beispiel #13
0
	Run(context.Context) error
}

var (
	retryDuration         = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)")
	executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls")
	remoteActionsTimeout  = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)")
	useV3ReshardingMode   = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.")

	healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology")
	healthcheckRetryDelay      = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck")
	healthCheckTimeout         = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period")

	statsState = stats.NewString("WorkerState")
	// statsRetryCount is the total number of times a query to vttablet had to be retried.
	statsRetryCount = stats.NewInt("WorkerRetryCount")
	// statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly".
	statsRetryCounters = stats.NewCounters("WorkerRetryCounters")
	// statsThrottledCounters is the number of times a write has been throttled,
	// grouped by (keyspace, shard, threadID). Mainly used for testing.
	// If throttling is enabled, this should always be non-zero for all threads.
	statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"})
	// statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing.
	statsStateDurationsNs = stats.NewCounters("WorkerStateDurations")
	// statsOnlineInsertsCounters tracks for every table how many rows were
	// inserted during the online clone (reconciliation) phase.
	statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were updated.
	statsOnlineUpdatesCounters = stats.NewCounters("WorkerOnlineUpdatesCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were deleted.
	statsOnlineDeletesCounters = stats.NewCounters("WorkerOnlineDeletesCounters")
Beispiel #14
0
	querypb "github.com/youtube/vitess/go/vt/proto/query"
)

// NewConnFunc is a factory method that creates a Conn instance
// using given ConnParams.
type NewConnFunc func(params ConnParams) (Conn, error)

var (
	defaultConn NewConnFunc

	// mu protects conns.
	mu    sync.Mutex
	conns = make(map[string]NewConnFunc)
	// Keep track of the total number of connections opened to MySQL. This is mainly
	// useful for tests, where we use this to approximate the number of ports that we used.
	connCount = stats.NewInt("mysql-new-connection-count")
)

// Conn defines the behavior for the low level db connection
type Conn interface {
	// ExecuteFetch executes the query on the connection
	ExecuteFetch(query string, maxrows int, wantfields bool) (*sqltypes.Result, error)
	// ExecuteFetchMap returns a map from column names to cell data for a query
	// that should return exactly 1 row.
	ExecuteFetchMap(query string) (map[string]string, error)
	// ExecuteStreamFetch starts a streaming query to db server. Use FetchNext
	// on the Connection until it returns nil or error
	ExecuteStreamFetch(query string) error
	// Close closes the db connection
	Close()
	// IsClosed returns if the connection was ever closed
Beispiel #15
0
const (
	DISABLED int64 = iota
	ENABLED
)

var usStateNames = map[int64]string{
	ENABLED:  "Enabled",
	DISABLED: "Disabled",
}

var (
	streamCount          = stats.NewCounters("UpdateStreamStreamCount")
	updateStreamErrors   = stats.NewCounters("UpdateStreamErrors")
	updateStreamEvents   = stats.NewCounters("UpdateStreamEvents")
	keyrangeStatements   = stats.NewInt("UpdateStreamKeyRangeStatements")
	keyrangeTransactions = stats.NewInt("UpdateStreamKeyRangeTransactions")
	tablesStatements     = stats.NewInt("UpdateStreamTablesStatements")
	tablesTransactions   = stats.NewInt("UpdateStreamTablesTransactions")
)

type UpdateStream struct {
	mycnf *mysqlctl.Mycnf

	actionLock     sync.Mutex
	state          sync2.AtomicInt64
	mysqld         *mysqlctl.Mysqld
	stateWaitGroup sync.WaitGroup
	dbname         string
	streams        streamList
}
Beispiel #16
0
// NewActionAgent creates a new ActionAgent and registers all the
// associated services.
//
// batchCtx is the context that the agent will use for any background tasks
// it spawns.
func NewActionAgent(
	batchCtx context.Context,
	mysqld mysqlctl.MysqlDaemon,
	queryServiceControl tabletserver.QueryServiceControl,
	tabletAlias topo.TabletAlias,
	dbcfgs *dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, securePort, gRPCPort int,
	overridesFile string,
	lockTimeout time.Duration,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()

	agent = &ActionAgent{
		QueryServiceControl: queryServiceControl,
		HealthReporter:      health.DefaultAggregator,
		batchCtx:            batchCtx,
		TopoServer:          topoServer,
		TabletAlias:         tabletAlias,
		MysqlDaemon:         mysqld,
		DBConfigs:           dbcfgs,
		SchemaOverrides:     schemaOverrides,
		LockTimeout:         lockTimeout,
		History:             history.New(historyLength),
		lastHealthMapCount:  stats.NewInt("LastHealthMapCount"),
		_healthy:            fmt.Errorf("healthcheck not run yet"),
		healthStreamMap:     make(map[int]chan<- *actionnode.HealthStreamReply),
	}

	// try to initialize the tablet if we have to
	if err := agent.InitTablet(port, securePort, gRPCPort); err != nil {
		return nil, fmt.Errorf("agent.InitTablet failed: %v", err)
	}

	// Publish and set the TargetTabletType. Not a global var
	// since it should never be changed.
	statsTabletType := stats.NewString("TargetTabletType")
	statsTabletType.Set(*targetTabletType)

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, &dbcfgs.Filtered, mysqld)
	RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)
		}
	}

	if err := agent.Start(batchCtx, mysqlPort, port, securePort, gRPCPort); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.registerQueryService()

	// two cases then:
	// - restoreFromBackup is set: we restore, then initHealthCheck, all
	//   in the background
	// - restoreFromBackup is not set: we initHealthCheck right away
	if *restoreFromBackup {
		go func() {
			// restoreFromBackup wil just be a regular action
			// (same as if it was triggered remotely)
			if err := agent.RestoreFromBackup(batchCtx); err != nil {
				println(fmt.Sprintf("RestoreFromBackup failed: %v", err))
				log.Fatalf("RestoreFromBackup failed: %v", err)
			}

			// after the restore is done, start health check
			agent.initHeathCheck()
		}()
	} else {
		// synchronously start health check if needed
		agent.initHeathCheck()
	}

	return agent, nil
}
Beispiel #17
0
	Run(context.Context) error
}

var (
	retryDuration         = flag.Duration("retry_duration", 2*time.Hour, "Amount of time we wait before giving up on a retryable action (e.g. write to destination, waiting for healthy tablets)")
	executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls")
	remoteActionsTimeout  = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)")
	useV3ReshardingMode   = flag.Bool("use_v3_resharding_mode", false, "True iff the workers should use V3-style resharding, which doesn't require a preset sharding key column.")

	healthCheckTopologyRefresh = flag.Duration("worker_healthcheck_topology_refresh", 30*time.Second, "refresh interval for re-reading the topology")
	healthcheckRetryDelay      = flag.Duration("worker_healthcheck_retry_delay", 5*time.Second, "delay before retrying a failed healthcheck")
	healthCheckTimeout         = flag.Duration("worker_healthcheck_timeout", time.Minute, "the health check timeout period")

	statsState = stats.NewString("WorkerState")
	// statsRetryCount is the total number of times a query to vttablet had to be retried.
	statsRetryCount = stats.NewInt("WorkerRetryCount")
	// statsRetryCount groups the number of retries by category e.g. "TimeoutError" or "Readonly".
	statsRetryCounters = stats.NewCounters("WorkerRetryCounters")
	// statsThrottledCounters is the number of times a write has been throttled,
	// grouped by (keyspace, shard, threadID). Mainly used for testing.
	// If throttling is enabled, this should always be non-zero for all threads.
	statsThrottledCounters = stats.NewMultiCounters("WorkerThrottledCounters", []string{"keyspace", "shardname", "thread_id"})
	// statsStateDurations tracks for each state how much time was spent in it. Mainly used for testing.
	statsStateDurationsNs = stats.NewCounters("WorkerStateDurations")

	// statsOnlineInsertsCounters tracks for every table how many rows were
	// inserted during the online clone (reconciliation) phase.
	statsOnlineInsertsCounters = stats.NewCounters("WorkerOnlineInsertsCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were updated.
	statsOnlineUpdatesCounters = stats.NewCounters("WorkerOnlineUpdatesCounters")
	// statsOnlineUpdatesCounters tracks for every table how many rows were deleted.
Beispiel #18
0
import (
	"encoding/hex"
	"fmt"
	"log"
	"strings"
	"unicode"

	"github.com/youtube/vitess/go/stats"
)

const (
	filteredReplicationUnfriendlyAnnotation = "/* vtgate:: filtered_replication_unfriendly */"
)

var (
	filteredReplicationUnfriendlyStatementsCount = stats.NewInt("FilteredReplicationUnfriendlyStatementsCount")
)

// AnnotateIfDML annotates 'sql' based on 'keyspaceIDs'
//
// If 'sql' is not a DML statement no annotation is added.
// If 'sql' is a DML statement and contains exactly one keyspaceID
//    it is used to annotate 'sql'
// Otherwise 'sql' is annotated as replication-unfriendly.
func AnnotateIfDML(sql string, keyspaceIDs [][]byte) string {
	if !IsDML(sql) {
		return sql
	}
	if len(keyspaceIDs) == 1 {
		return AddKeyspaceID(sql, keyspaceIDs[0], "")
	}
Beispiel #19
0
// NewActionAgent creates a new ActionAgent and registers all the
// associated services.
//
// batchCtx is the context that the agent will use for any background tasks
// it spawns.
func NewActionAgent(
	batchCtx context.Context,
	mysqld mysqlctl.MysqlDaemon,
	queryServiceControl tabletserver.Controller,
	tabletAlias *topodatapb.TabletAlias,
	dbcfgs dbconfigs.DBConfigs,
	mycnf *mysqlctl.Mycnf,
	port, gRPCPort int32,
	overridesFile string,
) (agent *ActionAgent, err error) {
	schemaOverrides := loadSchemaOverrides(overridesFile)

	topoServer := topo.GetServer()

	agent = &ActionAgent{
		QueryServiceControl: queryServiceControl,
		HealthReporter:      health.DefaultAggregator,
		batchCtx:            batchCtx,
		TopoServer:          topoServer,
		TabletAlias:         tabletAlias,
		MysqlDaemon:         mysqld,
		DBConfigs:           dbcfgs,
		SchemaOverrides:     schemaOverrides,
		History:             history.New(historyLength),
		lastHealthMapCount:  stats.NewInt("LastHealthMapCount"),
		_healthy:            fmt.Errorf("healthcheck not run yet"),
	}
	agent.registerQueryRuleSources()

	// try to initialize the tablet if we have to
	if err := agent.InitTablet(port, gRPCPort); err != nil {
		return nil, fmt.Errorf("agent.InitTablet failed: %v", err)
	}

	// Publish and set the TargetTabletType. Not a global var
	// since it should never be changed.
	statsTabletType := stats.NewString("TargetTabletType")
	statsTabletType.Set(*targetTabletType)

	// Create the TabletType stats
	agent.exportStats = true
	agent.statsTabletType = stats.NewString("TabletType")

	// Start the binlog player services, not playing at start.
	agent.BinlogPlayerMap = NewBinlogPlayerMap(topoServer, mysqld, func() binlogplayer.VtClient {
		return binlogplayer.NewDbClient(&agent.DBConfigs.Filtered)
	})
	// Stop all binlog players upon entering lameduck.
	servenv.OnTerm(agent.BinlogPlayerMap.StopAllPlayersAndReset)
	RegisterBinlogPlayerMap(agent.BinlogPlayerMap)

	// try to figure out the mysql port
	mysqlPort := mycnf.MysqlPort
	if mysqlPort == 0 {
		// we don't know the port, try to get it from mysqld
		var err error
		mysqlPort, err = mysqld.GetMysqlPort()
		if err != nil {
			log.Warningf("Cannot get current mysql port, will use 0 for now: %v", err)
		}
	}

	// Start will get the tablet info, and update our state from it
	if err := agent.Start(batchCtx, int32(mysqlPort), port, gRPCPort, true); err != nil {
		return nil, err
	}

	// register the RPC services from the agent
	agent.registerQueryService()

	// two cases then:
	// - restoreFromBackup is set: we restore, then initHealthCheck, all
	//   in the background
	// - restoreFromBackup is not set: we initHealthCheck right away
	if *restoreFromBackup {
		go func() {
			// restoreFromBackup wil just be a regular action
			// (same as if it was triggered remotely)
			if err := agent.RestoreFromBackup(batchCtx); err != nil {
				println(fmt.Sprintf("RestoreFromBackup failed: %v", err))
				log.Fatalf("RestoreFromBackup failed: %v", err)
			}

			// after the restore is done, start health check
			agent.initHealthCheck()
		}()
	} else {
		// synchronously start health check if needed
		agent.initHealthCheck()
	}

	return agent, nil
}
Beispiel #20
0
	"time"

	mproto "github.com/youtube/vitess/go/mysql/proto"
	"github.com/youtube/vitess/go/stats"
	"github.com/youtube/vitess/go/sync2"
	"github.com/youtube/vitess/go/timer"
	"github.com/youtube/vitess/go/vt/concurrency"
	tproto "github.com/youtube/vitess/go/vt/tabletserver/proto"
	"github.com/youtube/vitess/go/vt/tabletserver/tabletconn"
	"github.com/youtube/vitess/go/vt/topo"
	"golang.org/x/net/context"

	pb "github.com/youtube/vitess/go/vt/proto/topodata"
)

var danglingTabletConn = stats.NewInt("DanglingTabletConn")

// ShardConn represents a load balanced connection to a group
// of vttablets that belong to the same shard. ShardConn can
// be concurrently used across goroutines. Such requests are
// interleaved on the same underlying connection.
type ShardConn struct {
	keyspace           string
	shard              string
	tabletType         topo.TabletType
	retryDelay         time.Duration
	retryCount         int
	connTimeoutTotal   time.Duration
	connTimeoutPerConn time.Duration
	connLife           time.Duration
	balancer           *Balancer
Beispiel #21
0
	if len(lines) == 0 {
		emptyCount.Add(1)
		log.Warningf("no results for %v %v", req.qtype, req.qname)
	}
	return lines, nil
}

func write(w io.Writer, line string) {
	_, err := io.WriteString(w, line)
	if err != nil {
		log.Errorf("write failed: %v", err)
	}
}

var (
	requestCount = stats.NewInt("PdnsRequestCount")
	errorCount   = stats.NewInt("PdnsErrorCount")
	emptyCount   = stats.NewInt("PdnsEmptyCount")
)

func (pd *pdns) Serve(r io.Reader, w io.Writer) {
	log.Infof("starting zkns resolver")
	bufr := bufio.NewReader(r)
	needHandshake := true
	for {
		line, isPrefix, err := bufr.ReadLine()
		if err == nil && isPrefix {
			err = errLongLine
		}
		if err == io.EOF {
			return
Beispiel #22
0
	// ResolveDestinationMasters forces the worker to (re)resolve the topology and update
	// the destination masters that it knows about.
	ResolveDestinationMasters(ctx context.Context) error

	// GetDestinationMaster returns the most recently resolved destination master for a particular shard.
	GetDestinationMaster(shardName string) (*topo.TabletInfo, error)
}

var (
	resolveTTL            = flag.Duration("resolve_ttl", 15*time.Second, "Amount of time that a topo resolution can be cached for")
	executeFetchRetryTime = flag.Duration("executefetch_retry_time", 30*time.Second, "Amount of time we should wait before retrying ExecuteFetch calls")
	remoteActionsTimeout  = flag.Duration("remote_actions_timeout", time.Minute, "Amount of time to wait for remote actions (like replication stop, ...)")

	statsState = stats.NewString("WorkerState")
	// the number of times that the worker attempst to reresolve the masters
	statsDestinationAttemptedResolves = stats.NewInt("WorkerDestinationAttemptedResolves")
	// the number of times that the worker actually hits the topo server, i.e., they don't
	// use a cached topology
	statsDestinationActualResolves = stats.NewInt("WorkerDestinationActualResolves")
	statsRetryCounters             = stats.NewCounters("WorkerRetryCount")
)

// resetVars resets the debug variables that are meant to provide information on a
// per-run basis. This should be called at the beginning of each worker run.
func resetVars() {
	statsState.Set("")
	statsDestinationAttemptedResolves.Set(0)
	statsDestinationActualResolves.Set(0)
	statsRetryCounters.Reset()
}
Beispiel #23
0
	"time"

	"github.com/youtube/vitess/go/stats"
	topodatapb "github.com/youtube/vitess/go/vt/proto/topodata"
	vtrpcpb "github.com/youtube/vitess/go/vt/proto/vtrpc"
	"github.com/youtube/vitess/go/vt/vterrors"
)

var (
	enableFakeMasterBuffer = flag.Bool("enable_fake_master_buffer", false, "Enable fake master buffering.")
	bufferKeyspace         = flag.String("buffer_keyspace", "", "The name of the keyspace to buffer master requests on.")
	bufferShard            = flag.String("buffer_shard", "", "The name of the shard to buffer master requests on.")
	maxBufferSize          = flag.Int("max_buffer_size", 10, "The maximum number of master requests to buffer at a time.")
	fakeBufferDelay        = flag.Duration("fake_buffer_delay", 1*time.Second, "The amount of time that we should delay all master requests for, to fake a buffer.")

	bufferedRequestsAttempted  = stats.NewInt("BufferedRequestsAttempted")
	bufferedRequestsSuccessful = stats.NewInt("BufferedRequestsSuccessful")
	// Use this lock when adding to the number of currently buffered requests.
	bufferMu         sync.Mutex
	bufferedRequests = stats.NewInt("BufferedRequests")
)

// timeSleep can be mocked out in unit tests
var timeSleep = time.Sleep

// errBufferFull is the error returned a buffer request is rejected because the buffer is full.
var errBufferFull = vterrors.FromError(
	vtrpcpb.ErrorCode_TRANSIENT_ERROR,
	errors.New("master request buffer full, rejecting request"),
)
Beispiel #24
0
	"net/http"
	"time"

	log "github.com/golang/glog"
	rpc "github.com/youtube/vitess/go/rpcplus"
	"github.com/youtube/vitess/go/rpcwrap/auth"
	"github.com/youtube/vitess/go/rpcwrap/proto"
	"github.com/youtube/vitess/go/stats"
)

const (
	connected = "200 Connected to Go RPC"
)

var (
	connCount    = stats.NewInt("connection-count")
	connAccepted = stats.NewInt("connection-accepted")
)

type ClientCodecFactory func(conn io.ReadWriteCloser) rpc.ClientCodec

type BufferedConnection struct {
	isClosed bool
	*bufio.Reader
	io.WriteCloser
}

func NewBufferedConnection(conn io.ReadWriteCloser) *BufferedConnection {
	connCount.Add(1)
	connAccepted.Add(1)
	return &BufferedConnection{false, bufio.NewReader(conn), conn}
Beispiel #25
0
package umgmt

import (
	"crypto/tls"
	"crypto/x509"
	"net"
	"net/http"
	"os"
	"strings"
	"sync"

	log "github.com/golang/glog"
	"github.com/youtube/vitess/go/stats"
)

var connectionCount = stats.NewInt("ConnectionCount")
var connectionAccepted = stats.NewInt("ConnectionAccepted")

type connCountConn struct {
	net.Conn
	listener *connCountListener
	closed   bool
}

func (c *connCountConn) Close() (err error) {
	// in case there is a race closing a client
	if c.closed {
		return nil
	}
	err = c.Conn.Close()
	c.closed = true