// startStats blocks and periodically logs transaction statistics (throughput, // success rates, durations, ...). // TODO(tschottdorf): Use a proper metrics subsystem for this (+the store-level // stats). // TODO(mrtracy): Add this to TimeSeries. func (tc *TxnCoordSender) startStats() { res := time.Millisecond // for duration logging resolution lastNow := tc.clock.PhysicalNow() for { select { case <-time.After(statusLogInterval): if !log.V(1) { continue } tc.Lock() curStats := tc.txnStats tc.txnStats = txnCoordStats{} tc.Unlock() now := tc.clock.PhysicalNow() // Tests have weird clocks. if now-lastNow <= 0 { continue } num := len(curStats.durations) dMax := time.Duration(stats.Max(curStats.durations)) dMean := time.Duration(stats.Mean(curStats.durations)) dDev := time.Duration(stats.StdDevP(curStats.durations)) rMax := stats.Max(curStats.restarts) rMean := stats.Mean(curStats.restarts) rDev := stats.StdDevP(curStats.restarts) rate := float64(int64(num)*int64(time.Second)) / float64(now-lastNow) var pCommitted, pAbandoned, pAborted float32 if num > 0 { pCommitted = 100 * float32(curStats.committed) / float32(num) pAbandoned = 100 * float32(curStats.abandoned) / float32(num) pAborted = 100 * float32(curStats.aborted) / float32(num) } log.Infof("txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f %%cmmt/abrt/abnd, %s/%s/%s avg/σ/max duration, %.1f/%.1f/%.1f avg/σ/max restarts (%d samples)", rate, pCommitted, pAborted, pAbandoned, util.TruncateDuration(dMean, res), util.TruncateDuration(dDev, res), util.TruncateDuration(dMax, res), rMean, rDev, rMax, num) lastNow = now case <-tc.stopper.ShouldStop(): return } } }
// printStats prints the time it took for rebalancing to finish and the final // standard deviation of replica counts across stores. func (at *allocatorTest) printRebalanceStats(db *gosql.DB, host string, adminPort int) error { // TODO(cuongdo): Output these in a machine-friendly way and graph. // Output time it took to rebalance. { var rebalanceIntervalStr string var rebalanceInterval time.Duration q := `SELECT (SELECT MAX(timestamp) FROM rangelog) - ` + `(select MAX(timestamp) FROM eventlog WHERE eventType='` + string(sql.EventLogNodeJoin) + `')` if err := db.QueryRow(q).Scan(&rebalanceIntervalStr); err != nil { return err } rebalanceInterval, err := time.ParseDuration(rebalanceIntervalStr) if err != nil { return err } if rebalanceInterval < 0 { // This can happen with single-node clusters. rebalanceInterval = time.Duration(0) } log.Infof("cluster took %s to rebalance", rebalanceInterval) } // Output # of range events that occurred. All other things being equal, // larger numbers are worse and potentially indicate thrashing. { var rangeEvents int64 q := `SELECT COUNT(*) from rangelog` if err := db.QueryRow(q).Scan(&rangeEvents); err != nil { return err } log.Infof("%d range events", rangeEvents) } // Output standard deviation of the replica counts for all stores. { var client http.Client var nodesResp serverpb.NodesResponse url := fmt.Sprintf("http://%s:%d/_status/nodes", host, adminPort) if err := util.GetJSON(client, url, &nodesResp); err != nil { return err } var replicaCounts stats.Float64Data for _, node := range nodesResp.Nodes { for _, ss := range node.StoreStatuses { replicaCounts = append(replicaCounts, float64(ss.Metrics["replicas"])) } } stddev, err := stats.StdDevP(replicaCounts) if err != nil { return err } log.Infof("stddev(replica count) = %.2f", stddev) } return nil }
func (at *allocatorTest) stdDev() (float64, error) { host := at.f.Hostname(0) var client http.Client var nodesResp serverpb.NodesResponse url := fmt.Sprintf("http://%s:%s/_status/nodes", host, adminPort) if err := httputil.GetJSON(client, url, &nodesResp); err != nil { return 0, err } var replicaCounts stats.Float64Data for _, node := range nodesResp.Nodes { for _, ss := range node.StoreStatuses { replicaCounts = append(replicaCounts, float64(ss.Metrics["replicas"])) } } stdDev, err := stats.StdDevP(replicaCounts) if err != nil { return 0, err } return stdDev, nil }
// startStats blocks and periodically logs transaction statistics (throughput, // success rates, durations, ...). Note that this only captures write txns, // since read-only txns are stateless as far as TxnCoordSender is concerned. // stats). // TODO(mrtracy): Add this to TimeSeries. func (tc *TxnCoordSender) startStats() { res := time.Millisecond // for duration logging resolution lastNow := tc.clock.PhysicalNow() for { select { case <-time.After(statusLogInterval): if !log.V(1) { continue } tc.Lock() curStats := tc.txnStats tc.txnStats = txnCoordStats{} tc.Unlock() now := tc.clock.PhysicalNow() // Tests have weird clocks. if now-lastNow <= 0 { continue } num := len(curStats.durations) // Only compute when non-empty input. var dMax, dMean, dDev, rMax, rMean, rDev float64 var err error if num > 0 { // There should never be an error in the below // computations. dMax, err = stats.Max(curStats.durations) if err != nil { panic(err) } dMean, err = stats.Mean(curStats.durations) if err != nil { panic(err) } dDev, err = stats.StdDevP(curStats.durations) if err != nil { panic(err) } rMax, err = stats.Max(curStats.restarts) if err != nil { panic(err) } rMean, err = stats.Mean(curStats.restarts) if err != nil { panic(err) } rDev, err = stats.StdDevP(curStats.restarts) if err != nil { panic(err) } } rate := float64(int64(num)*int64(time.Second)) / float64(now-lastNow) var pCommitted, pAbandoned, pAborted float32 if fNum := float32(num); fNum > 0 { pCommitted = 100 * float32(curStats.committed) / fNum pAbandoned = 100 * float32(curStats.abandoned) / fNum pAborted = 100 * float32(curStats.aborted) / fNum } log.Infof( "txn coordinator: %.2f txn/sec, %.2f/%.2f/%.2f %%cmmt/abrt/abnd, %s/%s/%s avg/σ/max duration, %.1f/%.1f/%.1f avg/σ/max restarts (%d samples)", rate, pCommitted, pAborted, pAbandoned, util.TruncateDuration(time.Duration(dMean), res), util.TruncateDuration(time.Duration(dDev), res), util.TruncateDuration(time.Duration(dMax), res), rMean, rDev, rMax, num, ) lastNow = now case <-tc.stopper.ShouldStop(): return } } }