func runMVCCConditionalPut(emk engineMaker, valueSize int, createFirst bool, b *testing.B) { rng, _ := randutil.NewPseudoRand() value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) keyBuf := append(make([]byte, 0, 64), []byte("key-")...) eng := emk(b, fmt.Sprintf("cput_%d", valueSize)) defer eng.Close() b.SetBytes(int64(valueSize)) var expected *roachpb.Value if createFirst { for i := 0; i < b.N; i++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) ts := makeTS(timeutil.Now().UnixNano(), 0) if err := MVCCPut(context.Background(), eng, nil, key, ts, value, nil); err != nil { b.Fatalf("failed put: %s", err) } } expected = &value } b.ResetTimer() for i := 0; i < b.N; i++ { key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) ts := makeTS(timeutil.Now().UnixNano(), 0) if err := MVCCConditionalPut(context.Background(), eng, nil, key, ts, value, expected, nil); err != nil { b.Fatalf("failed put: %s", err) } } b.StopTimer() }
// Heartbeat is called to update a node's expiration timestamp. This // method does a conditional put on the node liveness record, and if // successful, stores the updated liveness record in the nodes map. func (nl *NodeLiveness) Heartbeat(ctx context.Context, liveness *Liveness) error { defer func(start time.Time) { if dur := timeutil.Now().Sub(start); dur > time.Second { log.Warningf(ctx, "slow heartbeat took %0.1fs", dur.Seconds()) } }(timeutil.Now()) // Allow only one heartbeat at a time. select { case nl.heartbeatSem <- struct{}{}: case <-ctx.Done(): return ctx.Err() } defer func() { <-nl.heartbeatSem }() nodeID := nl.gossip.NodeID.Get() var newLiveness Liveness if liveness == nil { newLiveness = Liveness{ NodeID: nodeID, Epoch: 1, } } else { newLiveness = *liveness } // We need to add the maximum clock offset to the expiration because it's // used when determining liveness for a node. newLiveness.Expiration = nl.clock.Now().Add( (nl.livenessThreshold + nl.clock.MaxOffset()).Nanoseconds(), 0) if err := nl.updateLiveness(ctx, &newLiveness, liveness, func(actual Liveness) error { // Update liveness to actual value on mismatch. nl.mu.Lock() nl.mu.self = actual nl.mu.Unlock() // If the actual liveness is different than expected, but is // considered live, treat the heartbeat as a success. This can // happen when the periodic heartbeater races with a concurrent // lease acquisition. if actual.isLive(nl.clock.Now(), nl.clock.MaxOffset()) { return errNodeAlreadyLive } // Otherwise, return error. return errSkippedHeartbeat }); err != nil { if err == errNodeAlreadyLive { return nil } nl.metrics.HeartbeatFailures.Inc(1) return err } log.VEventf(ctx, 1, "heartbeat %+v", newLiveness.Expiration) nl.mu.Lock() nl.mu.self = newLiveness nl.mu.Unlock() nl.metrics.HeartbeatSuccesses.Inc(1) return nil }
func testGossipPeeringsInner( ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig, ) { num := c.NumNodes() deadline := timeutil.Now().Add(cfg.Duration) waitTime := longWaitTime if cfg.Duration < waitTime { waitTime = shortWaitTime } for timeutil.Now().Before(deadline) { CheckGossip(ctx, t, c, waitTime, HasPeers(num)) // Restart the first node. log.Infof(ctx, "restarting node 0") if err := c.Restart(ctx, 0); err != nil { t.Fatal(err) } CheckGossip(ctx, t, c, waitTime, HasPeers(num)) // Restart another node (if there is one). var pickedNode int if num > 1 { pickedNode = rand.Intn(num-1) + 1 } log.Infof(ctx, "restarting node %d", pickedNode) if err := c.Restart(ctx, pickedNode); err != nil { t.Fatal(err) } CheckGossip(ctx, t, c, waitTime, HasPeers(num)) } }
func (z *zeroSum) monitor(d time.Duration) { start := timeutil.Now() lastTime := start var lastOps uint64 for ticks := 0; true; ticks++ { time.Sleep(d) if ticks%20 == 0 { fmt.Printf("_elapsed__accounts_________ops__ops/sec___errors___splits____xfers___ranges_____________replicas\n") } now := timeutil.Now() elapsed := now.Sub(lastTime).Seconds() ops := atomic.LoadUint64(&z.stats.ops) z.ranges.Lock() ranges, replicas := z.ranges.count, z.ranges.replicas z.ranges.Unlock() fmt.Printf("%8s %9d %11d %8.1f %8d %8d %8d %8d %20s\n", time.Duration(now.Sub(start).Seconds()+0.5)*time.Second, z.accountsLen(), ops, float64(ops-lastOps)/elapsed, atomic.LoadUint64(&z.stats.errors), atomic.LoadUint64(&z.stats.splits), atomic.LoadUint64(&z.stats.transfers), ranges, z.formatReplicas(replicas)) lastTime = now lastOps = ops } }
// AfterTest snapshots the currently-running goroutines and returns a // function to be run at the end of tests to see whether any // goroutines leaked. func AfterTest(t testing.TB) func() { orig := interestingGoroutines() return func() { if t.Failed() { return } if r := recover(); r != nil { panic(r) } // Loop, waiting for goroutines to shut down. // Wait up to 5 seconds, but finish as quickly as possible. deadline := timeutil.Now().Add(5 * time.Second) for { var leaked []string for id, stack := range interestingGoroutines() { if _, ok := orig[id]; !ok { leaked = append(leaked, stack) } } if len(leaked) == 0 { return } if timeutil.Now().Before(deadline) { time.Sleep(50 * time.Millisecond) continue } sort.Strings(leaked) for _, g := range leaked { t.Errorf("Leaked goroutine: %v", g) } return } } }
func (a *allocSim) monitor(d time.Duration) { const padding = "__________" formatHeader := func(numReplicas int) string { var buf bytes.Buffer _, _ = buf.WriteString("_elapsed__ops/sec___errors_replicas") for i := 1; i <= numReplicas; i++ { node := fmt.Sprintf("%d", i) fmt.Fprintf(&buf, "%s%s", padding[:len(padding)-len(node)], node) } return buf.String() } formatNodes := func(replicas, leases []int) string { var buf bytes.Buffer for i := range replicas { alive := a.Nodes[i].Alive() if !alive { _, _ = buf.WriteString("\033[0;31;49m") } fmt.Fprintf(&buf, "%*s", len(padding), fmt.Sprintf("%d/%d", replicas[i], leases[i])) if !alive { _, _ = buf.WriteString("\033[0m") } } return buf.String() } start := timeutil.Now() lastTime := start var numReplicas int var lastOps uint64 for ticks := 0; true; ticks++ { time.Sleep(d) now := timeutil.Now() elapsed := now.Sub(lastTime).Seconds() ops := atomic.LoadUint64(&a.stats.ops) a.ranges.Lock() ranges := a.ranges.count replicas := a.ranges.replicas leases := a.ranges.leases a.ranges.Unlock() if ticks%20 == 0 || numReplicas != len(replicas) { numReplicas = len(replicas) fmt.Println(formatHeader(numReplicas)) } fmt.Printf("%8s %8.1f %8d %8d%s\n", time.Duration(now.Sub(start).Seconds()+0.5)*time.Second, float64(ops-lastOps)/elapsed, atomic.LoadUint64(&a.stats.errors), ranges, formatNodes(replicas, leases)) lastTime = now lastOps = ops } }
// insertLoad add a very basic load that inserts into a unique table and checks // that the inserted values are indeed correct. func insertLoad(t *testing.T, dc *dynamicClient, ID int) { // Initialize the db. if _, err := dc.exec(`CREATE DATABASE IF NOT EXISTS Insert`); err != nil { t.Fatal(err) } tableName := fmt.Sprintf("Insert.Table%d", ID) createTableStatement := fmt.Sprintf(` CREATE TABLE %s ( key INT PRIMARY KEY, value INT NOT NULL )`, tableName) insertStatement := fmt.Sprintf(`INSERT INTO %s (key, value) VALUES ($1, $1)`, tableName) selectStatement := fmt.Sprintf(`SELECT key-value AS "total" FROM %s WHERE key = $1`, tableName) // Init the db for the basic insert. if _, err := dc.exec(createTableStatement); err != nil { t.Fatal(err) } var valueCheck, valueInsert int nextUpdate := timeutil.Now() // Perform inserts and selects for dc.isRunning() { // Insert some values. valueInsert++ if _, err := dc.exec(insertStatement, valueInsert); err != nil { if err == errTestFinished { return } t.Fatal(err) } // Check that another value is still correct. valueCheck-- if valueCheck < 1 { valueCheck = valueInsert } var total int if err := dc.queryRowScan(selectStatement, []interface{}{valueCheck}, []interface{}{&total}); err != nil { if err == errTestFinished { return } t.Fatal(err) } if total != 0 { t.Fatalf("total expected to be 0, is %d", total) } if timeutil.Now().After(nextUpdate) { log.Infof(context.TODO(), "Insert %d: inserted and checked %d values", ID, valueInsert) nextUpdate = timeutil.Now().Add(time.Second) } } }
// Logs returns the log entries parsed from the log files stored on // the server. Log entries are returned in reverse chronological order. The // following options are available: // * "starttime" query parameter filters the log entries to only ones that // occurred on or after the "starttime". Defaults to a day ago. // * "endtime" query parameter filters the log entries to only ones that // occurred before on on the "endtime". Defaults to the current time. // * "pattern" query parameter filters the log entries by the provided regexp // pattern if it exists. Defaults to nil. // * "max" query parameter is the hard limit of the number of returned log // entries. Defaults to defaultMaxLogEntries. // * "level" query parameter filters the log entries to be those of the // corresponding severity level or worse. Defaults to "info". func (s *statusServer) Logs( _ context.Context, req *serverpb.LogsRequest, ) (*serverpb.LogEntriesResponse, error) { log.Flush() var sev log.Severity if len(req.Level) == 0 { sev = log.Severity_INFO } else { var sevFound bool sev, sevFound = log.SeverityByName(req.Level) if !sevFound { return nil, fmt.Errorf("level could not be determined: %s", req.Level) } } startTimestamp, err := parseInt64WithDefault( req.StartTime, timeutil.Now().AddDate(0, 0, -1).UnixNano()) if err != nil { return nil, grpc.Errorf(codes.InvalidArgument, "StartTime could not be parsed: %s", err) } endTimestamp, err := parseInt64WithDefault(req.EndTime, timeutil.Now().UnixNano()) if err != nil { return nil, grpc.Errorf(codes.InvalidArgument, "EndTime could not be parsed: %s", err) } if startTimestamp > endTimestamp { return nil, grpc.Errorf(codes.InvalidArgument, "StartTime: %d should not be greater than endtime: %d", startTimestamp, endTimestamp) } maxEntries, err := parseInt64WithDefault(req.Max, defaultMaxLogEntries) if err != nil { return nil, grpc.Errorf(codes.InvalidArgument, "Max could not be parsed: %s", err) } if maxEntries < 1 { return nil, grpc.Errorf(codes.InvalidArgument, "Max: %d should be set to a value greater than 0", maxEntries) } var regex *regexp.Regexp if len(req.Pattern) > 0 { if regex, err = regexp.Compile(req.Pattern); err != nil { return nil, grpc.Errorf(codes.InvalidArgument, "regex pattern could not be compiled: %s", err) } } entries, err := log.FetchEntriesFromFiles(sev, startTimestamp, endTimestamp, int(maxEntries), regex) if err != nil { return nil, err } return &serverpb.LogEntriesResponse{Entries: entries}, nil }
// If the time is greater than the timestamp stored at `key`, run `f`. // Before running `f`, the timestamp is updated forward by a small amount via // a compare-and-swap to ensure at-most-one concurrent execution. After `f` // executes the timestamp is set to the next execution time. // Returns how long until `f` should be run next (i.e. when this method should // be called again). func (s *Server) maybeRunPeriodicCheck( op string, key roachpb.Key, f func(context.Context), ) time.Duration { ctx, span := s.AnnotateCtxWithSpan(context.Background(), "op") defer span.Finish() // Add the op name to the log context. ctx = log.WithLogTag(ctx, op, nil) resp, err := s.db.Get(ctx, key) if err != nil { log.Infof(ctx, "error reading time: %s", err) return updateCheckRetryFrequency } // We should early returned below if either the next check time is in the // future or if the atomic compare-and-set of that time failed (which // would happen if two nodes tried at the same time). if resp.Exists() { whenToCheck, pErr := resp.Value.GetTime() if pErr != nil { log.Warningf(ctx, "error decoding time: %s", err) return updateCheckRetryFrequency } else if delay := whenToCheck.Sub(timeutil.Now()); delay > 0 { return delay } nextRetry := whenToCheck.Add(updateCheckRetryFrequency) if err := s.db.CPut(ctx, key, nextRetry, whenToCheck); err != nil { if log.V(2) { log.Infof(ctx, "could not set next version check time (maybe another node checked?): %s", err) } return updateCheckRetryFrequency } } else { log.Infof(ctx, "No previous %s time.", op) nextRetry := timeutil.Now().Add(updateCheckRetryFrequency) // CPut with `nil` prev value to assert that no other node has checked. if err := s.db.CPut(ctx, key, nextRetry, nil); err != nil { if log.V(2) { log.Infof(ctx, "Could not set %s time (maybe another node checked?): %v", op, err) } return updateCheckRetryFrequency } } f(ctx) if err := s.db.Put(ctx, key, timeutil.Now().Add(updateCheckFrequency)); err != nil { log.Infof(ctx, "Error updating %s time: %v", op, err) } return updateCheckFrequency }
func testPutInner(ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { db, err := c.NewClient(ctx, 0) if err != nil { t.Fatal(err) } errs := make(chan error, c.NumNodes()) start := timeutil.Now() deadline := start.Add(cfg.Duration) var count int64 for i := 0; i < c.NumNodes(); i++ { go func() { r, _ := randutil.NewPseudoRand() value := randutil.RandBytes(r, 8192) for timeutil.Now().Before(deadline) { k := atomic.AddInt64(&count, 1) v := value[:r.Intn(len(value))] if err := db.Put(ctx, fmt.Sprintf("%08d", k), v); err != nil { errs <- err return } } errs <- nil }() } for i := 0; i < c.NumNodes(); { baseCount := atomic.LoadInt64(&count) select { case <-stopper.ShouldStop(): t.Fatalf("interrupted") case err := <-errs: if err != nil { t.Fatal(err) } i++ case <-time.After(1 * time.Second): // Periodically print out progress so that we know the test is still // running. loadedCount := atomic.LoadInt64(&count) log.Infof(ctx, "%d (%d/s)", loadedCount, loadedCount-baseCount) c.Assert(ctx, t) if err := cluster.Consistent(ctx, c, 0); err != nil { t.Fatal(err) } } } elapsed := timeutil.Since(start) log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds()) }
// Wait until all clients have stopped. func waitClientsStop(ctx context.Context, num int, state *testState, stallDuration time.Duration) { prevRound := atomic.LoadUint64(&state.monkeyIteration) stallTime := timeutil.Now().Add(stallDuration) var prevOutput string // Spin until all clients are shut. for numShutClients := 0; numShutClients < num; { select { case <-state.teardown: case <-stopper.ShouldStop(): state.t.Fatal("interrupted") case err := <-state.errChan: if err != nil { state.t.Error(err) } numShutClients++ case <-time.After(time.Second): var newOutput string if timeutil.Now().Before(state.deadline) { curRound := atomic.LoadUint64(&state.monkeyIteration) if curRound == prevRound { if timeutil.Now().After(stallTime) { atomic.StoreInt32(&state.stalled, 1) state.t.Fatalf("Stall detected at round %d, no forward progress for %s", curRound, stallDuration) } } else { prevRound = curRound stallTime = timeutil.Now().Add(stallDuration) } // Periodically print out progress so that we know the test is // still running and making progress. counts := state.counts() strCounts := make([]string, len(counts)) for i := range counts { strCounts[i] = strconv.FormatUint(counts[i], 10) } newOutput = fmt.Sprintf("round %d: client counts: (%s)", curRound, strings.Join(strCounts, ", ")) } else { newOutput = fmt.Sprintf("test finished, waiting for shutdown of %d clients", num-numShutClients) } // This just stops the logs from being a bit too spammy. if newOutput != prevOutput { log.Infof(ctx, newOutput) prevOutput = newOutput } } } }
// RetryForDuration will retry the given function until it either returns // without error, or the given duration has elapsed. The function is invoked // immediately at first and then successively with an exponential backoff // starting at 1ns and ending at the specified duration. func RetryForDuration(duration time.Duration, fn func() error) error { deadline := timeutil.Now().Add(duration) var lastErr error for wait := time.Duration(1); timeutil.Now().Before(deadline); wait *= 2 { lastErr = fn() if lastErr == nil { return nil } if wait > time.Second { wait = time.Second } time.Sleep(wait) } return lastErr }
// newTemplate returns a partially-filled template. // It should be further populated based on whether the cert is for a CA or node. func newTemplate(commonName string) (*x509.Certificate, error) { // Generate a random serial number. serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128) serialNumber, err := rand.Int(rand.Reader, serialNumberLimit) if err != nil { return nil, err } notBefore := timeutil.Now().Add(validFrom) notAfter := notBefore.Add(validFor) cert := &x509.Certificate{ SerialNumber: serialNumber, Subject: pkix.Name{ Organization: []string{"Cockroach"}, CommonName: commonName, }, NotBefore: notBefore, NotAfter: notAfter, KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, } return cert, nil }
// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess( ctx context.Context, start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica, ) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof(ctx, "wait timer interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: if log.V(6) { log.Infof(ctx, "wait timer fired") } rs.waitTimer.Read = true if repl == nil { return false } if log.V(2) { log.Infof(ctx, "replica scanner processing %s", repl) } for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } return false case repl := <-rs.removed: rs.removeReplica(repl) case <-stopper.ShouldStop(): return true } } }
func (sc *SchemaChanger) backfillIndexes( lease *sqlbase.TableDescriptor_SchemaChangeLease, version sqlbase.DescriptorVersion, added []sqlbase.IndexDescriptor, mutationIdx int, ) error { if len(added) == 0 { return nil } // Initialize a span of keys. sp, err := sc.getTableSpan(mutationIdx) if err != nil { return err } // Backfill the index entries for all the rows. chunkSize := sc.getChunkSize(indexBackfillChunkSize) lastCheckpoint := timeutil.Now() for row, done := int64(0), false; !done; row += chunkSize { // First extend the schema change lease. if err := sc.ExtendLease(lease); err != nil { return err } if log.V(2) { log.Infof(context.TODO(), "index add (%d, %d) at row: %d, span: %s", sc.tableID, sc.mutationID, row, sp) } sp.Key, done, err = sc.backfillIndexesChunk(version, added, sp, chunkSize, mutationIdx, &lastCheckpoint) if err != nil { return err } } return nil }
// TestScannerPaceInterval tests that paceInterval returns the correct interval. func TestScannerPaceInterval(t *testing.T) { defer leaktest.AfterTest(t)() const count = 3 durations := []time.Duration{ 30 * time.Millisecond, 60 * time.Millisecond, 500 * time.Millisecond, } // function logs an error when the actual value is not close // to the expected value logErrorWhenNotCloseTo := func(expected, actual time.Duration) { delta := 1 * time.Millisecond if actual < expected-delta || actual > expected+delta { t.Errorf("Expected duration %s, got %s", expected, actual) } } for _, duration := range durations { startTime := timeutil.Now() ranges := newTestRangeSet(count, t) s := newReplicaScanner(log.AmbientContext{}, duration, 0, ranges) interval := s.paceInterval(startTime, startTime) logErrorWhenNotCloseTo(duration/count, interval) // The range set is empty ranges = newTestRangeSet(0, t) s = newReplicaScanner(log.AmbientContext{}, duration, 0, ranges) interval = s.paceInterval(startTime, startTime) logErrorWhenNotCloseTo(duration, interval) ranges = newTestRangeSet(count, t) s = newReplicaScanner(log.AmbientContext{}, duration, 0, ranges) // Move the present to duration time into the future interval = s.paceInterval(startTime, startTime.Add(duration)) logErrorWhenNotCloseTo(0, interval) } }
func (c *readTimeoutConn) Read(b []byte) (int, error) { // readTimeout is the amount of time ReadTimeoutConn should wait on a // read before checking for exit conditions. The tradeoff is between the // time it takes to react to session context cancellation and the overhead // of waking up and checking for exit conditions. const readTimeout = 150 * time.Millisecond // Remove the read deadline when returning from this function to avoid // unexpected behavior. defer func() { _ = c.SetReadDeadline(time.Time{}) }() for { if err := c.checkExitConds(); err != nil { return 0, err } if err := c.SetReadDeadline(timeutil.Now().Add(readTimeout)); err != nil { return 0, err } n, err := c.Conn.Read(b) // Continue if the error is due to timing out. if err, ok := err.(net.Error); ok && err.Timeout() { continue } return n, err } }
func (rq *replicateQueue) transferLease( ctx context.Context, repl *Replica, desc *roachpb.RangeDescriptor, zone config.ZoneConfig, checkTransferLeaseSource bool, ) (bool, error) { candidates := filterBehindReplicas(repl.RaftStatus(), desc.Replicas) if target := rq.allocator.TransferLeaseTarget( zone.Constraints, candidates, repl.store.StoreID(), desc.RangeID, checkTransferLeaseSource, ); target != (roachpb.ReplicaDescriptor{}) { rq.metrics.TransferLeaseCount.Inc(1) log.VEventf(ctx, 1, "transferring lease to s%d", target.StoreID) if err := repl.AdminTransferLease(ctx, target.StoreID); err != nil { return false, errors.Wrapf(err, "%s: unable to transfer lease to s%d", repl, target.StoreID) } rq.lastLeaseTransfer.Store(timeutil.Now()) return true, nil } return false, nil }
func generateUniqueBytes(nodeID roachpb.NodeID) DBytes { // Unique bytes are composed of the current time in nanoseconds and the // node-id. If the nanosecond value is the same on two consecutive calls to // timeutil.Now() the nanoseconds value is incremented. The node-id is varint // encoded. Since node-ids are allocated consecutively starting at 1, the // node-id field will consume 1 or 2 bytes for any reasonably sized cluster. // // TODO(pmattis): Do we have to worry about persisting the milliseconds value // periodically to avoid the clock ever going backwards (e.g. due to NTP // adjustment)? nanos := uint64(timeutil.Now().UnixNano()) uniqueBytesState.Lock() if nanos <= uniqueBytesState.nanos { nanos = uniqueBytesState.nanos + 1 } uniqueBytesState.nanos = nanos uniqueBytesState.Unlock() b := make([]byte, 0, 8+binary.MaxVarintLen32) b = encoding.EncodeUint64Ascending(b, nanos) // We use binary.PutUvarint instead of encoding.EncodeUvarint because the // former uses less space for values < 128 which is a common occurrence for // node IDs. n := binary.PutUvarint(b[len(b):len(b)+binary.MaxVarintLen32], uint64(nodeID)) return DBytes(b[:len(b)+n]) }
func (sp *StorePool) String() string { sp.mu.Lock() defer sp.mu.Unlock() ids := make(roachpb.StoreIDSlice, 0, len(sp.mu.storeDetails)) for id := range sp.mu.storeDetails { ids = append(ids, id) } sort.Sort(ids) var buf bytes.Buffer now := timeutil.Now() for _, id := range ids { detail := sp.mu.storeDetails[id] fmt.Fprintf(&buf, "%d", id) if detail.dead { _, _ = buf.WriteString("*") } fmt.Fprintf(&buf, ": range-count=%d fraction-used=%.2f", detail.desc.Capacity.RangeCount, detail.desc.Capacity.FractionUsed()) throttled := detail.throttledUntil.Sub(now) if throttled > 0 { fmt.Fprintf(&buf, " [throttled=%.1fs]", throttled.Seconds()) } _, _ = buf.WriteString("\n") } return buf.String() }
func (sc *SchemaChanger) maybeWriteResumeSpan( txn *client.Txn, version sqlbase.DescriptorVersion, resume roachpb.Span, mutationIdx int, lastCheckpoint *time.Time, ) error { checkpointInterval := checkpointInterval if sc.testingKnobs.WriteCheckpointInterval > 0 { checkpointInterval = sc.testingKnobs.WriteCheckpointInterval } if timeutil.Since(*lastCheckpoint) < checkpointInterval { return nil } tableDesc, err := sqlbase.GetTableDescFromID(txn, sc.tableID) if err != nil { return err } if tableDesc.Version != version { return errVersionMismatch } tableDesc.Mutations[mutationIdx].ResumeSpan = resume txn.SetSystemConfigTrigger() if err := txn.Put(sqlbase.MakeDescMetadataKey(tableDesc.GetID()), sqlbase.WrapDescriptor(tableDesc)); err != nil { return err } *lastCheckpoint = timeutil.Now() return nil }
func TestNodeStatus(t *testing.T) { defer leaktest.AfterTest(t)() start := timeutil.Now() c, err := newCLITest(t, false) if err != nil { t.Fatal(err) } defer c.stop(true) // Refresh time series data, which is required to retrieve stats. if err := c.WriteSummaries(); err != nil { t.Fatalf("couldn't write stats summaries: %s", err) } out, err := c.RunWithCapture("node status 1 --pretty") if err != nil { t.Fatal(err) } checkNodeStatus(t, c, out, start) out, err = c.RunWithCapture("node status --pretty") if err != nil { t.Fatal(err) } checkNodeStatus(t, c, out, start) }
// scanLoop loops endlessly, scanning through replicas available via // the replica set, or until the scanner is stopped. The iteration // is paced to complete a full scan in approximately the scan interval. func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) { stopper.RunWorker(func() { ctx := rs.AnnotateCtx(context.Background()) start := timeutil.Now() // waitTimer is reset in each call to waitAndProcess. defer rs.waitTimer.Stop() for { if rs.GetDisabled() { if done := rs.waitEnabled(stopper); done { return } continue } var shouldStop bool count := 0 rs.replicas.Visit(func(repl *Replica) bool { count++ shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, repl) return !shouldStop }) if count == 0 { // No replicas processed, just wait. shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, nil) } shouldStop = shouldStop || nil != stopper.RunTask(func() { // Increment iteration count. rs.mu.Lock() defer rs.mu.Unlock() rs.mu.scanCount++ rs.mu.total += timeutil.Since(start) if log.V(6) { log.Infof(ctx, "reset replica scan iteration") } // Reset iteration and start time. start = timeutil.Now() }) if shouldStop { return } } }) }
func (t *logicTest) success(file string) { t.progress++ now := timeutil.Now() if now.Sub(t.lastProgress) >= 2*time.Second { t.lastProgress = now fmt.Printf("--- progress: %s: %d statements/queries\n", file, t.progress) } }
func TestEncodeDecodeRawSpan(t *testing.T) { s := basictracer.RawSpan{ Context: basictracer.SpanContext{ TraceID: 1, SpanID: 2, Sampled: true, Baggage: make(map[string]string), }, ParentSpanID: 13, Operation: "testop", Start: timeutil.Now(), Duration: 15 * time.Millisecond, Tags: make(map[string]interface{}), Logs: []opentracing.LogRecord{ { Timestamp: timeutil.Now().Add(2 * time.Millisecond), }, { Timestamp: timeutil.Now().Add(5 * time.Millisecond), Fields: []otlog.Field{ otlog.Int("f1", 3), otlog.String("f2", "f2Val"), }, }, }, } s.Context.Baggage["bag"] = "bagVal" s.Tags["tag"] = 5 enc, err := EncodeRawSpan(&s, nil) if err != nil { t.Fatal(err) } var d basictracer.RawSpan if err = DecodeRawSpan(enc, &d); err != nil { t.Fatal(err) } // We cannot use DeepEqual because we encode all log fields as strings. So // e.g. the "f1" field above is now a string, not an int. The string // representations must match though. sStr := fmt.Sprint(s) dStr := fmt.Sprint(d) if sStr != dStr { t.Errorf("initial span: '%s', after encode/decode: '%s'", sStr, dStr) } }
// processReplica processes a single replica. This should not be // called externally to the queue. bq.mu.Lock must not be held // while calling this method. func (bq *baseQueue) processReplica( queueCtx context.Context, repl *Replica, clock *hlc.Clock, ) error { bq.processMu.Lock() defer bq.processMu.Unlock() // Load the system config. cfg, ok := bq.gossip.GetSystemConfig() if !ok { log.VEventf(queueCtx, 1, "no system config available, skipping") return nil } if bq.requiresSplit(cfg, repl) { // Range needs to be split due to zone configs, but queue does // not accept unsplit ranges. log.VEventf(queueCtx, 3, "%s: split needed; skipping", repl) return nil } // Putting a span in a context means that events will no longer go to the // event log. Use queueCtx for events that are intended for the event log. ctx, span := bq.AnnotateCtxWithSpan(queueCtx, bq.name) defer span.Finish() // Also add the Replica annotations to ctx. ctx = repl.AnnotateCtx(ctx) ctx, cancel := context.WithTimeout(ctx, bq.processTimeout) defer cancel() log.Eventf(ctx, "processing replica") // If the queue requires a replica to have the range lease in // order to be processed, check whether this replica has range lease // and renew or acquire if necessary. if bq.needsLease { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. if err := repl.redirectOnOrAcquireLease(ctx); err != nil { if _, harmless := err.GetDetail().(*roachpb.NotLeaseHolderError); harmless { log.VEventf(queueCtx, 3, "not holding lease; skipping") return nil } return errors.Wrapf(err.GoError(), "%s: could not obtain lease", repl) } log.Event(ctx, "got range lease") } log.VEventf(queueCtx, 3, "processing") start := timeutil.Now() err := bq.impl.process(ctx, clock.Now(), repl, cfg) duration := timeutil.Since(start) bq.processingNanos.Inc(duration.Nanoseconds()) if err != nil { return err } log.VEventf(queueCtx, 2, "done: %s", duration) log.Event(ctx, "done") bq.successes.Inc(1) return nil }
func testDecimalSingleArgFunc( t *testing.T, f func(*inf.Dec, *inf.Dec, inf.Scale) (*inf.Dec, error), s inf.Scale, tests []decimalOneArgTestCase, ) { for _, tc := range tests { t.Run(fmt.Sprintf("%s = %s", tc.input, tc.expected), func(t *testing.T) { x, exp := new(inf.Dec), new(inf.Dec) x.SetString(tc.input) exp.SetString(tc.expected) // Test allocated return value. var z *inf.Dec var err error done := make(chan struct{}, 1) start := timeutil.Now() go func() { z, err = f(nil, x, s) done <- struct{}{} }() var after <-chan time.Time if *flagDurationLimit > 0 { after = time.After(*flagDurationLimit) } select { case <-done: t.Logf("execute duration: %s", timeutil.Since(start)) case <-after: t.Fatalf("timedout after %s", *flagDurationLimit) } if err != nil { if tc.expected != err.Error() { t.Errorf("expected error %s, got %s", tc.expected, err) } return } if exp.Cmp(z) != 0 { t.Errorf("expected %s, got %s", exp, z) } // Test provided decimal mutation. z.SetString("0.0") _, _ = f(z, x, s) if exp.Cmp(z) != 0 { t.Errorf("expected %s, got %s", exp, z) } // Test same arg mutation. _, _ = f(x, x, s) if exp.Cmp(x) != 0 { t.Errorf("expected %s, got %s", exp, x) } x.SetString(tc.input) }) } }
func testAdminLossOfQuorumInner( ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig, ) { if c.NumNodes() < 2 { t.Logf("skipping test %s because given cluster has too few nodes", cfg.Name) return } // Get the ids for each node. nodeIDs := make([]roachpb.NodeID, c.NumNodes()) for i := 0; i < c.NumNodes(); i++ { var details serverpb.DetailsResponse if err := httputil.GetJSON(cluster.HTTPClient, c.URL(ctx, i)+"/_status/details/local", &details); err != nil { t.Fatalf("failed to get local details from node %d: %s", i, err) } nodeIDs[i] = details.NodeID } // Leave only the first node alive. for i := 1; i < c.NumNodes(); i++ { if err := c.Kill(ctx, i); err != nil { t.Fatal(err) } } // Retrieve node statuses. var nodes serverpb.NodesResponse if err := httputil.GetJSON(cluster.HTTPClient, c.URL(ctx, 0)+"/_status/nodes", &nodes); err != nil { t.Fatal(err) } for _, nodeID := range nodeIDs { var nodeStatus status.NodeStatus if err := httputil.GetJSON(cluster.HTTPClient, c.URL(ctx, 0)+"/_status/nodes/"+strconv.Itoa(int(nodeID)), &nodeStatus); err != nil { t.Fatal(err) } } // Retrieve time-series data. nowNanos := timeutil.Now().UnixNano() queryRequest := tspb.TimeSeriesQueryRequest{ StartNanos: nowNanos - 10*time.Second.Nanoseconds(), EndNanos: nowNanos, Queries: []tspb.Query{ {Name: "doesn't_matter", Sources: []string{}}, }, } var queryResponse tspb.TimeSeriesQueryResponse if err := httputil.PostJSON(cluster.HTTPClient, c.URL(ctx, 0)+"/ts/query", &queryRequest, &queryResponse); err != nil { t.Fatal(err) } // TODO(cdo): When we're able to issue SQL queries without a quorum, test all // admin endpoints that issue SQL queries here. }
func TestLeaseManagerReacquire(testingT *testing.T) { defer leaktest.AfterTest(testingT)() params, _ := createTestServerParams() removalTracker := csql.NewLeaseRemovalTracker() params.Knobs = base.TestingKnobs{ SQLLeaseManager: &csql.LeaseManagerTestingKnobs{ LeaseStoreTestingKnobs: csql.LeaseStoreTestingKnobs{ LeaseReleasedEvent: removalTracker.LeaseRemovedNotification, }, }, } t := newLeaseTest(testingT, params) defer t.cleanup() const descID = keys.LeaseTableID // Acquire 2 leases from the same node. They should point to the same lease // structure. l1 := t.mustAcquire(1, descID, 0) l2 := t.mustAcquire(1, descID, 0) if l1 != l2 { t.Fatalf("expected same lease, but found %p != %p", l1, l2) } if l1.Refcount() != 2 { t.Fatalf("expected refcount of 2, but found %d", l1.Refcount()) } t.expectLeases(descID, "/1/1") // Set the minimum lease duration such that the next lease acquisition will // require the lease to be reacquired. savedLeaseDuration, savedMinLeaseDuration := csql.LeaseDuration, csql.MinLeaseDuration defer func() { csql.LeaseDuration, csql.MinLeaseDuration = savedLeaseDuration, savedMinLeaseDuration }() csql.MinLeaseDuration = l1.Expiration().Sub(timeutil.Now()) csql.LeaseDuration = 2 * csql.MinLeaseDuration // Another lease acquisition from the same node will result in a new lease. l3 := t.mustAcquire(1, descID, 0) if l1 == l3 { t.Fatalf("expected different leases, but found %p", l1) } if l3.Refcount() != 1 { t.Fatalf("expected refcount of 1, but found %d", l3.Refcount()) } if l3.Expiration().Before(l1.Expiration()) { t.Fatalf("expected new lease expiration (%s) to be after old lease expiration (%s)", l3.Expiration(), l1.Expiration()) } t.expectLeases(descID, "/1/1 /1/1") t.mustRelease(1, l1, nil) t.mustRelease(1, l2, removalTracker) t.mustRelease(1, l3, nil) }
// monotonicUnixNano returns a monotonically increasing value for // nanoseconds in Unix time. Since equal times are ignored with // updates to infos, we're careful to avoid incorrectly ignoring a // newly created value in the event one is created within the same // nanosecond. Really unlikely except for the case of unittests, but // better safe than sorry. func monotonicUnixNano() int64 { monoTime.Lock() defer monoTime.Unlock() now := timeutil.Now().UnixNano() if now <= monoTime.last { now = monoTime.last + 1 } monoTime.last = now return now }