Ejemplo n.º 1
0
func runMVCCConditionalPut(emk engineMaker, valueSize int, createFirst bool, b *testing.B) {
	rng, _ := randutil.NewPseudoRand()
	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)

	eng := emk(b, fmt.Sprintf("cput_%d", valueSize))
	defer eng.Close()

	b.SetBytes(int64(valueSize))
	var expected *roachpb.Value
	if createFirst {
		for i := 0; i < b.N; i++ {
			key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
			ts := makeTS(timeutil.Now().UnixNano(), 0)
			if err := MVCCPut(context.Background(), eng, nil, key, ts, value, nil); err != nil {
				b.Fatalf("failed put: %s", err)
			}
		}
		expected = &value
	}

	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
		ts := makeTS(timeutil.Now().UnixNano(), 0)
		if err := MVCCConditionalPut(context.Background(), eng, nil, key, ts, value, expected, nil); err != nil {
			b.Fatalf("failed put: %s", err)
		}
	}

	b.StopTimer()
}
Ejemplo n.º 2
0
// Heartbeat is called to update a node's expiration timestamp. This
// method does a conditional put on the node liveness record, and if
// successful, stores the updated liveness record in the nodes map.
func (nl *NodeLiveness) Heartbeat(ctx context.Context, liveness *Liveness) error {
	defer func(start time.Time) {
		if dur := timeutil.Now().Sub(start); dur > time.Second {
			log.Warningf(ctx, "slow heartbeat took %0.1fs", dur.Seconds())
		}
	}(timeutil.Now())

	// Allow only one heartbeat at a time.
	select {
	case nl.heartbeatSem <- struct{}{}:
	case <-ctx.Done():
		return ctx.Err()
	}
	defer func() {
		<-nl.heartbeatSem
	}()

	nodeID := nl.gossip.NodeID.Get()
	var newLiveness Liveness
	if liveness == nil {
		newLiveness = Liveness{
			NodeID: nodeID,
			Epoch:  1,
		}
	} else {
		newLiveness = *liveness
	}
	// We need to add the maximum clock offset to the expiration because it's
	// used when determining liveness for a node.
	newLiveness.Expiration = nl.clock.Now().Add(
		(nl.livenessThreshold + nl.clock.MaxOffset()).Nanoseconds(), 0)
	if err := nl.updateLiveness(ctx, &newLiveness, liveness, func(actual Liveness) error {
		// Update liveness to actual value on mismatch.
		nl.mu.Lock()
		nl.mu.self = actual
		nl.mu.Unlock()
		// If the actual liveness is different than expected, but is
		// considered live, treat the heartbeat as a success. This can
		// happen when the periodic heartbeater races with a concurrent
		// lease acquisition.
		if actual.isLive(nl.clock.Now(), nl.clock.MaxOffset()) {
			return errNodeAlreadyLive
		}
		// Otherwise, return error.
		return errSkippedHeartbeat
	}); err != nil {
		if err == errNodeAlreadyLive {
			return nil
		}
		nl.metrics.HeartbeatFailures.Inc(1)
		return err
	}

	log.VEventf(ctx, 1, "heartbeat %+v", newLiveness.Expiration)
	nl.mu.Lock()
	nl.mu.self = newLiveness
	nl.mu.Unlock()
	nl.metrics.HeartbeatSuccesses.Inc(1)
	return nil
}
Ejemplo n.º 3
0
func testGossipPeeringsInner(
	ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig,
) {
	num := c.NumNodes()

	deadline := timeutil.Now().Add(cfg.Duration)

	waitTime := longWaitTime
	if cfg.Duration < waitTime {
		waitTime = shortWaitTime
	}

	for timeutil.Now().Before(deadline) {
		CheckGossip(ctx, t, c, waitTime, HasPeers(num))

		// Restart the first node.
		log.Infof(ctx, "restarting node 0")
		if err := c.Restart(ctx, 0); err != nil {
			t.Fatal(err)
		}
		CheckGossip(ctx, t, c, waitTime, HasPeers(num))

		// Restart another node (if there is one).
		var pickedNode int
		if num > 1 {
			pickedNode = rand.Intn(num-1) + 1
		}
		log.Infof(ctx, "restarting node %d", pickedNode)
		if err := c.Restart(ctx, pickedNode); err != nil {
			t.Fatal(err)
		}
		CheckGossip(ctx, t, c, waitTime, HasPeers(num))
	}
}
Ejemplo n.º 4
0
func (z *zeroSum) monitor(d time.Duration) {
	start := timeutil.Now()
	lastTime := start
	var lastOps uint64

	for ticks := 0; true; ticks++ {
		time.Sleep(d)

		if ticks%20 == 0 {
			fmt.Printf("_elapsed__accounts_________ops__ops/sec___errors___splits____xfers___ranges_____________replicas\n")
		}

		now := timeutil.Now()
		elapsed := now.Sub(lastTime).Seconds()
		ops := atomic.LoadUint64(&z.stats.ops)

		z.ranges.Lock()
		ranges, replicas := z.ranges.count, z.ranges.replicas
		z.ranges.Unlock()

		fmt.Printf("%8s %9d %11d %8.1f %8d %8d %8d %8d %20s\n",
			time.Duration(now.Sub(start).Seconds()+0.5)*time.Second,
			z.accountsLen(), ops, float64(ops-lastOps)/elapsed,
			atomic.LoadUint64(&z.stats.errors),
			atomic.LoadUint64(&z.stats.splits),
			atomic.LoadUint64(&z.stats.transfers),
			ranges, z.formatReplicas(replicas))
		lastTime = now
		lastOps = ops
	}
}
Ejemplo n.º 5
0
// AfterTest snapshots the currently-running goroutines and returns a
// function to be run at the end of tests to see whether any
// goroutines leaked.
func AfterTest(t testing.TB) func() {
	orig := interestingGoroutines()
	return func() {
		if t.Failed() {
			return
		}
		if r := recover(); r != nil {
			panic(r)
		}
		// Loop, waiting for goroutines to shut down.
		// Wait up to 5 seconds, but finish as quickly as possible.
		deadline := timeutil.Now().Add(5 * time.Second)
		for {
			var leaked []string
			for id, stack := range interestingGoroutines() {
				if _, ok := orig[id]; !ok {
					leaked = append(leaked, stack)
				}
			}
			if len(leaked) == 0 {
				return
			}
			if timeutil.Now().Before(deadline) {
				time.Sleep(50 * time.Millisecond)
				continue
			}
			sort.Strings(leaked)
			for _, g := range leaked {
				t.Errorf("Leaked goroutine: %v", g)
			}
			return
		}
	}
}
Ejemplo n.º 6
0
func (a *allocSim) monitor(d time.Duration) {
	const padding = "__________"

	formatHeader := func(numReplicas int) string {
		var buf bytes.Buffer
		_, _ = buf.WriteString("_elapsed__ops/sec___errors_replicas")
		for i := 1; i <= numReplicas; i++ {
			node := fmt.Sprintf("%d", i)
			fmt.Fprintf(&buf, "%s%s", padding[:len(padding)-len(node)], node)
		}
		return buf.String()
	}

	formatNodes := func(replicas, leases []int) string {
		var buf bytes.Buffer
		for i := range replicas {
			alive := a.Nodes[i].Alive()
			if !alive {
				_, _ = buf.WriteString("\033[0;31;49m")
			}
			fmt.Fprintf(&buf, "%*s", len(padding), fmt.Sprintf("%d/%d", replicas[i], leases[i]))
			if !alive {
				_, _ = buf.WriteString("\033[0m")
			}
		}
		return buf.String()
	}

	start := timeutil.Now()
	lastTime := start
	var numReplicas int
	var lastOps uint64

	for ticks := 0; true; ticks++ {
		time.Sleep(d)

		now := timeutil.Now()
		elapsed := now.Sub(lastTime).Seconds()
		ops := atomic.LoadUint64(&a.stats.ops)

		a.ranges.Lock()
		ranges := a.ranges.count
		replicas := a.ranges.replicas
		leases := a.ranges.leases
		a.ranges.Unlock()

		if ticks%20 == 0 || numReplicas != len(replicas) {
			numReplicas = len(replicas)
			fmt.Println(formatHeader(numReplicas))
		}

		fmt.Printf("%8s %8.1f %8d %8d%s\n",
			time.Duration(now.Sub(start).Seconds()+0.5)*time.Second,
			float64(ops-lastOps)/elapsed, atomic.LoadUint64(&a.stats.errors),
			ranges, formatNodes(replicas, leases))
		lastTime = now
		lastOps = ops
	}
}
Ejemplo n.º 7
0
// insertLoad add a very basic load that inserts into a unique table and checks
// that the inserted values are indeed correct.
func insertLoad(t *testing.T, dc *dynamicClient, ID int) {
	// Initialize the db.
	if _, err := dc.exec(`CREATE DATABASE IF NOT EXISTS Insert`); err != nil {
		t.Fatal(err)
	}

	tableName := fmt.Sprintf("Insert.Table%d", ID)
	createTableStatement := fmt.Sprintf(`
CREATE TABLE %s (
	key INT PRIMARY KEY,
	value INT NOT NULL
)`, tableName)
	insertStatement := fmt.Sprintf(`INSERT INTO %s (key, value) VALUES ($1, $1)`, tableName)
	selectStatement := fmt.Sprintf(`SELECT key-value AS "total" FROM %s WHERE key = $1`, tableName)

	// Init the db for the basic insert.
	if _, err := dc.exec(createTableStatement); err != nil {
		t.Fatal(err)
	}

	var valueCheck, valueInsert int
	nextUpdate := timeutil.Now()

	// Perform inserts and selects
	for dc.isRunning() {

		// Insert some values.
		valueInsert++
		if _, err := dc.exec(insertStatement, valueInsert); err != nil {
			if err == errTestFinished {
				return
			}
			t.Fatal(err)
		}

		// Check that another value is still correct.
		valueCheck--
		if valueCheck < 1 {
			valueCheck = valueInsert
		}

		var total int
		if err := dc.queryRowScan(selectStatement, []interface{}{valueCheck}, []interface{}{&total}); err != nil {
			if err == errTestFinished {
				return
			}
			t.Fatal(err)
		}
		if total != 0 {
			t.Fatalf("total expected to be 0, is %d", total)
		}

		if timeutil.Now().After(nextUpdate) {
			log.Infof(context.TODO(), "Insert %d: inserted and checked %d values", ID, valueInsert)
			nextUpdate = timeutil.Now().Add(time.Second)
		}
	}
}
Ejemplo n.º 8
0
// Logs returns the log entries parsed from the log files stored on
// the server. Log entries are returned in reverse chronological order. The
// following options are available:
// * "starttime" query parameter filters the log entries to only ones that
//   occurred on or after the "starttime". Defaults to a day ago.
// * "endtime" query parameter filters the log entries to only ones that
//   occurred before on on the "endtime". Defaults to the current time.
// * "pattern" query parameter filters the log entries by the provided regexp
//   pattern if it exists. Defaults to nil.
// * "max" query parameter is the hard limit of the number of returned log
//   entries. Defaults to defaultMaxLogEntries.
// * "level" query parameter filters the log entries to be those of the
//   corresponding severity level or worse. Defaults to "info".
func (s *statusServer) Logs(
	_ context.Context, req *serverpb.LogsRequest,
) (*serverpb.LogEntriesResponse, error) {
	log.Flush()

	var sev log.Severity
	if len(req.Level) == 0 {
		sev = log.Severity_INFO
	} else {
		var sevFound bool
		sev, sevFound = log.SeverityByName(req.Level)
		if !sevFound {
			return nil, fmt.Errorf("level could not be determined: %s", req.Level)
		}
	}

	startTimestamp, err := parseInt64WithDefault(
		req.StartTime,
		timeutil.Now().AddDate(0, 0, -1).UnixNano())
	if err != nil {
		return nil, grpc.Errorf(codes.InvalidArgument, "StartTime could not be parsed: %s", err)
	}

	endTimestamp, err := parseInt64WithDefault(req.EndTime, timeutil.Now().UnixNano())
	if err != nil {
		return nil, grpc.Errorf(codes.InvalidArgument, "EndTime could not be parsed: %s", err)
	}

	if startTimestamp > endTimestamp {
		return nil, grpc.Errorf(codes.InvalidArgument, "StartTime: %d should not be greater than endtime: %d", startTimestamp, endTimestamp)
	}

	maxEntries, err := parseInt64WithDefault(req.Max, defaultMaxLogEntries)
	if err != nil {
		return nil, grpc.Errorf(codes.InvalidArgument, "Max could not be parsed: %s", err)
	}
	if maxEntries < 1 {
		return nil, grpc.Errorf(codes.InvalidArgument, "Max: %d should be set to a value greater than 0", maxEntries)
	}

	var regex *regexp.Regexp
	if len(req.Pattern) > 0 {
		if regex, err = regexp.Compile(req.Pattern); err != nil {
			return nil, grpc.Errorf(codes.InvalidArgument, "regex pattern could not be compiled: %s", err)
		}
	}

	entries, err := log.FetchEntriesFromFiles(sev, startTimestamp, endTimestamp, int(maxEntries), regex)
	if err != nil {
		return nil, err
	}

	return &serverpb.LogEntriesResponse{Entries: entries}, nil
}
Ejemplo n.º 9
0
// If the time is greater than the timestamp stored at `key`, run `f`.
// Before running `f`, the timestamp is updated forward by a small amount via
// a compare-and-swap to ensure at-most-one concurrent execution. After `f`
// executes the timestamp is set to the next execution time.
// Returns how long until `f` should be run next (i.e. when this method should
// be called again).
func (s *Server) maybeRunPeriodicCheck(
	op string, key roachpb.Key, f func(context.Context),
) time.Duration {
	ctx, span := s.AnnotateCtxWithSpan(context.Background(), "op")
	defer span.Finish()

	// Add the op name to the log context.
	ctx = log.WithLogTag(ctx, op, nil)

	resp, err := s.db.Get(ctx, key)
	if err != nil {
		log.Infof(ctx, "error reading time: %s", err)
		return updateCheckRetryFrequency
	}

	// We should early returned below if either the next check time is in the
	// future or if the atomic compare-and-set of that time failed (which
	// would happen if two nodes tried at the same time).
	if resp.Exists() {
		whenToCheck, pErr := resp.Value.GetTime()
		if pErr != nil {
			log.Warningf(ctx, "error decoding time: %s", err)
			return updateCheckRetryFrequency
		} else if delay := whenToCheck.Sub(timeutil.Now()); delay > 0 {
			return delay
		}

		nextRetry := whenToCheck.Add(updateCheckRetryFrequency)
		if err := s.db.CPut(ctx, key, nextRetry, whenToCheck); err != nil {
			if log.V(2) {
				log.Infof(ctx, "could not set next version check time (maybe another node checked?): %s", err)
			}
			return updateCheckRetryFrequency
		}
	} else {
		log.Infof(ctx, "No previous %s time.", op)
		nextRetry := timeutil.Now().Add(updateCheckRetryFrequency)
		// CPut with `nil` prev value to assert that no other node has checked.
		if err := s.db.CPut(ctx, key, nextRetry, nil); err != nil {
			if log.V(2) {
				log.Infof(ctx, "Could not set %s time (maybe another node checked?): %v", op, err)
			}
			return updateCheckRetryFrequency
		}
	}

	f(ctx)

	if err := s.db.Put(ctx, key, timeutil.Now().Add(updateCheckFrequency)); err != nil {
		log.Infof(ctx, "Error updating %s time: %v", op, err)
	}
	return updateCheckFrequency
}
Ejemplo n.º 10
0
func testPutInner(ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) {
	db, err := c.NewClient(ctx, 0)
	if err != nil {
		t.Fatal(err)
	}

	errs := make(chan error, c.NumNodes())
	start := timeutil.Now()
	deadline := start.Add(cfg.Duration)
	var count int64
	for i := 0; i < c.NumNodes(); i++ {
		go func() {
			r, _ := randutil.NewPseudoRand()
			value := randutil.RandBytes(r, 8192)

			for timeutil.Now().Before(deadline) {
				k := atomic.AddInt64(&count, 1)
				v := value[:r.Intn(len(value))]
				if err := db.Put(ctx, fmt.Sprintf("%08d", k), v); err != nil {
					errs <- err
					return
				}
			}
			errs <- nil
		}()
	}

	for i := 0; i < c.NumNodes(); {
		baseCount := atomic.LoadInt64(&count)
		select {
		case <-stopper.ShouldStop():
			t.Fatalf("interrupted")
		case err := <-errs:
			if err != nil {
				t.Fatal(err)
			}
			i++
		case <-time.After(1 * time.Second):
			// Periodically print out progress so that we know the test is still
			// running.
			loadedCount := atomic.LoadInt64(&count)
			log.Infof(ctx, "%d (%d/s)", loadedCount, loadedCount-baseCount)
			c.Assert(ctx, t)
			if err := cluster.Consistent(ctx, c, 0); err != nil {
				t.Fatal(err)
			}
		}
	}

	elapsed := timeutil.Since(start)
	log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Ejemplo n.º 11
0
// Wait until all clients have stopped.
func waitClientsStop(ctx context.Context, num int, state *testState, stallDuration time.Duration) {
	prevRound := atomic.LoadUint64(&state.monkeyIteration)
	stallTime := timeutil.Now().Add(stallDuration)
	var prevOutput string
	// Spin until all clients are shut.
	for numShutClients := 0; numShutClients < num; {
		select {
		case <-state.teardown:
		case <-stopper.ShouldStop():
			state.t.Fatal("interrupted")

		case err := <-state.errChan:
			if err != nil {
				state.t.Error(err)
			}
			numShutClients++

		case <-time.After(time.Second):
			var newOutput string
			if timeutil.Now().Before(state.deadline) {
				curRound := atomic.LoadUint64(&state.monkeyIteration)
				if curRound == prevRound {
					if timeutil.Now().After(stallTime) {
						atomic.StoreInt32(&state.stalled, 1)
						state.t.Fatalf("Stall detected at round %d, no forward progress for %s", curRound, stallDuration)
					}
				} else {
					prevRound = curRound
					stallTime = timeutil.Now().Add(stallDuration)
				}
				// Periodically print out progress so that we know the test is
				// still running and making progress.
				counts := state.counts()
				strCounts := make([]string, len(counts))
				for i := range counts {
					strCounts[i] = strconv.FormatUint(counts[i], 10)
				}
				newOutput = fmt.Sprintf("round %d: client counts: (%s)", curRound, strings.Join(strCounts, ", "))
			} else {
				newOutput = fmt.Sprintf("test finished, waiting for shutdown of %d clients", num-numShutClients)
			}
			// This just stops the logs from being a bit too spammy.
			if newOutput != prevOutput {
				log.Infof(ctx, newOutput)
				prevOutput = newOutput
			}
		}
	}
}
Ejemplo n.º 12
0
// RetryForDuration will retry the given function until it either returns
// without error, or the given duration has elapsed. The function is invoked
// immediately at first and then successively with an exponential backoff
// starting at 1ns and ending at the specified duration.
func RetryForDuration(duration time.Duration, fn func() error) error {
	deadline := timeutil.Now().Add(duration)
	var lastErr error
	for wait := time.Duration(1); timeutil.Now().Before(deadline); wait *= 2 {
		lastErr = fn()
		if lastErr == nil {
			return nil
		}
		if wait > time.Second {
			wait = time.Second
		}
		time.Sleep(wait)
	}
	return lastErr
}
Ejemplo n.º 13
0
// newTemplate returns a partially-filled template.
// It should be further populated based on whether the cert is for a CA or node.
func newTemplate(commonName string) (*x509.Certificate, error) {
	// Generate a random serial number.
	serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128)
	serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
	if err != nil {
		return nil, err
	}

	notBefore := timeutil.Now().Add(validFrom)
	notAfter := notBefore.Add(validFor)

	cert := &x509.Certificate{
		SerialNumber: serialNumber,
		Subject: pkix.Name{
			Organization: []string{"Cockroach"},
			CommonName:   commonName,
		},
		NotBefore: notBefore,
		NotAfter:  notAfter,

		KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
	}

	return cert, nil
}
Ejemplo n.º 14
0
// waitAndProcess waits for the pace interval and processes the replica
// if repl is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a replica from queues when it
// is signaled via the removed channel.
func (rs *replicaScanner) waitAndProcess(
	ctx context.Context, start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica,
) bool {
	waitInterval := rs.paceInterval(start, timeutil.Now())
	rs.waitTimer.Reset(waitInterval)
	if log.V(6) {
		log.Infof(ctx, "wait timer interval set to %s", waitInterval)
	}
	for {
		select {
		case <-rs.waitTimer.C:
			if log.V(6) {
				log.Infof(ctx, "wait timer fired")
			}
			rs.waitTimer.Read = true
			if repl == nil {
				return false
			}

			if log.V(2) {
				log.Infof(ctx, "replica scanner processing %s", repl)
			}
			for _, q := range rs.queues {
				q.MaybeAdd(repl, clock.Now())
			}
			return false

		case repl := <-rs.removed:
			rs.removeReplica(repl)

		case <-stopper.ShouldStop():
			return true
		}
	}
}
Ejemplo n.º 15
0
func (sc *SchemaChanger) backfillIndexes(
	lease *sqlbase.TableDescriptor_SchemaChangeLease,
	version sqlbase.DescriptorVersion,
	added []sqlbase.IndexDescriptor,
	mutationIdx int,
) error {
	if len(added) == 0 {
		return nil
	}

	// Initialize a span of keys.
	sp, err := sc.getTableSpan(mutationIdx)
	if err != nil {
		return err
	}

	// Backfill the index entries for all the rows.
	chunkSize := sc.getChunkSize(indexBackfillChunkSize)
	lastCheckpoint := timeutil.Now()
	for row, done := int64(0), false; !done; row += chunkSize {
		// First extend the schema change lease.
		if err := sc.ExtendLease(lease); err != nil {
			return err
		}
		if log.V(2) {
			log.Infof(context.TODO(), "index add (%d, %d) at row: %d, span: %s",
				sc.tableID, sc.mutationID, row, sp)
		}
		sp.Key, done, err = sc.backfillIndexesChunk(version, added, sp, chunkSize, mutationIdx, &lastCheckpoint)
		if err != nil {
			return err
		}
	}
	return nil
}
Ejemplo n.º 16
0
// TestScannerPaceInterval tests that paceInterval returns the correct interval.
func TestScannerPaceInterval(t *testing.T) {
	defer leaktest.AfterTest(t)()
	const count = 3
	durations := []time.Duration{
		30 * time.Millisecond,
		60 * time.Millisecond,
		500 * time.Millisecond,
	}
	// function logs an error when the actual value is not close
	// to the expected value
	logErrorWhenNotCloseTo := func(expected, actual time.Duration) {
		delta := 1 * time.Millisecond
		if actual < expected-delta || actual > expected+delta {
			t.Errorf("Expected duration %s, got %s", expected, actual)
		}
	}
	for _, duration := range durations {
		startTime := timeutil.Now()
		ranges := newTestRangeSet(count, t)
		s := newReplicaScanner(log.AmbientContext{}, duration, 0, ranges)
		interval := s.paceInterval(startTime, startTime)
		logErrorWhenNotCloseTo(duration/count, interval)
		// The range set is empty
		ranges = newTestRangeSet(0, t)
		s = newReplicaScanner(log.AmbientContext{}, duration, 0, ranges)
		interval = s.paceInterval(startTime, startTime)
		logErrorWhenNotCloseTo(duration, interval)
		ranges = newTestRangeSet(count, t)
		s = newReplicaScanner(log.AmbientContext{}, duration, 0, ranges)
		// Move the present to duration time into the future
		interval = s.paceInterval(startTime, startTime.Add(duration))
		logErrorWhenNotCloseTo(0, interval)
	}
}
Ejemplo n.º 17
0
func (c *readTimeoutConn) Read(b []byte) (int, error) {
	// readTimeout is the amount of time ReadTimeoutConn should wait on a
	// read before checking for exit conditions. The tradeoff is between the
	// time it takes to react to session context cancellation and the overhead
	// of waking up and checking for exit conditions.
	const readTimeout = 150 * time.Millisecond

	// Remove the read deadline when returning from this function to avoid
	// unexpected behavior.
	defer func() { _ = c.SetReadDeadline(time.Time{}) }()
	for {
		if err := c.checkExitConds(); err != nil {
			return 0, err
		}
		if err := c.SetReadDeadline(timeutil.Now().Add(readTimeout)); err != nil {
			return 0, err
		}
		n, err := c.Conn.Read(b)
		// Continue if the error is due to timing out.
		if err, ok := err.(net.Error); ok && err.Timeout() {
			continue
		}
		return n, err
	}
}
Ejemplo n.º 18
0
func (rq *replicateQueue) transferLease(
	ctx context.Context,
	repl *Replica,
	desc *roachpb.RangeDescriptor,
	zone config.ZoneConfig,
	checkTransferLeaseSource bool,
) (bool, error) {
	candidates := filterBehindReplicas(repl.RaftStatus(), desc.Replicas)
	if target := rq.allocator.TransferLeaseTarget(
		zone.Constraints,
		candidates,
		repl.store.StoreID(),
		desc.RangeID,
		checkTransferLeaseSource,
	); target != (roachpb.ReplicaDescriptor{}) {
		rq.metrics.TransferLeaseCount.Inc(1)
		log.VEventf(ctx, 1, "transferring lease to s%d", target.StoreID)
		if err := repl.AdminTransferLease(ctx, target.StoreID); err != nil {
			return false, errors.Wrapf(err, "%s: unable to transfer lease to s%d", repl, target.StoreID)
		}
		rq.lastLeaseTransfer.Store(timeutil.Now())
		return true, nil
	}
	return false, nil
}
Ejemplo n.º 19
0
func generateUniqueBytes(nodeID roachpb.NodeID) DBytes {
	// Unique bytes are composed of the current time in nanoseconds and the
	// node-id. If the nanosecond value is the same on two consecutive calls to
	// timeutil.Now() the nanoseconds value is incremented. The node-id is varint
	// encoded. Since node-ids are allocated consecutively starting at 1, the
	// node-id field will consume 1 or 2 bytes for any reasonably sized cluster.
	//
	// TODO(pmattis): Do we have to worry about persisting the milliseconds value
	// periodically to avoid the clock ever going backwards (e.g. due to NTP
	// adjustment)?
	nanos := uint64(timeutil.Now().UnixNano())
	uniqueBytesState.Lock()
	if nanos <= uniqueBytesState.nanos {
		nanos = uniqueBytesState.nanos + 1
	}
	uniqueBytesState.nanos = nanos
	uniqueBytesState.Unlock()

	b := make([]byte, 0, 8+binary.MaxVarintLen32)
	b = encoding.EncodeUint64Ascending(b, nanos)
	// We use binary.PutUvarint instead of encoding.EncodeUvarint because the
	// former uses less space for values < 128 which is a common occurrence for
	// node IDs.
	n := binary.PutUvarint(b[len(b):len(b)+binary.MaxVarintLen32], uint64(nodeID))
	return DBytes(b[:len(b)+n])
}
Ejemplo n.º 20
0
func (sp *StorePool) String() string {
	sp.mu.Lock()
	defer sp.mu.Unlock()

	ids := make(roachpb.StoreIDSlice, 0, len(sp.mu.storeDetails))
	for id := range sp.mu.storeDetails {
		ids = append(ids, id)
	}
	sort.Sort(ids)

	var buf bytes.Buffer
	now := timeutil.Now()

	for _, id := range ids {
		detail := sp.mu.storeDetails[id]
		fmt.Fprintf(&buf, "%d", id)
		if detail.dead {
			_, _ = buf.WriteString("*")
		}
		fmt.Fprintf(&buf, ": range-count=%d fraction-used=%.2f",
			detail.desc.Capacity.RangeCount, detail.desc.Capacity.FractionUsed())
		throttled := detail.throttledUntil.Sub(now)
		if throttled > 0 {
			fmt.Fprintf(&buf, " [throttled=%.1fs]", throttled.Seconds())
		}
		_, _ = buf.WriteString("\n")
	}
	return buf.String()
}
Ejemplo n.º 21
0
func (sc *SchemaChanger) maybeWriteResumeSpan(
	txn *client.Txn,
	version sqlbase.DescriptorVersion,
	resume roachpb.Span,
	mutationIdx int,
	lastCheckpoint *time.Time,
) error {
	checkpointInterval := checkpointInterval
	if sc.testingKnobs.WriteCheckpointInterval > 0 {
		checkpointInterval = sc.testingKnobs.WriteCheckpointInterval
	}
	if timeutil.Since(*lastCheckpoint) < checkpointInterval {
		return nil
	}
	tableDesc, err := sqlbase.GetTableDescFromID(txn, sc.tableID)
	if err != nil {
		return err
	}
	if tableDesc.Version != version {
		return errVersionMismatch
	}
	tableDesc.Mutations[mutationIdx].ResumeSpan = resume
	txn.SetSystemConfigTrigger()
	if err := txn.Put(sqlbase.MakeDescMetadataKey(tableDesc.GetID()),
		sqlbase.WrapDescriptor(tableDesc)); err != nil {
		return err
	}
	*lastCheckpoint = timeutil.Now()
	return nil
}
Ejemplo n.º 22
0
func TestNodeStatus(t *testing.T) {
	defer leaktest.AfterTest(t)()

	start := timeutil.Now()
	c, err := newCLITest(t, false)
	if err != nil {
		t.Fatal(err)
	}
	defer c.stop(true)

	// Refresh time series data, which is required to retrieve stats.
	if err := c.WriteSummaries(); err != nil {
		t.Fatalf("couldn't write stats summaries: %s", err)
	}

	out, err := c.RunWithCapture("node status 1 --pretty")
	if err != nil {
		t.Fatal(err)
	}
	checkNodeStatus(t, c, out, start)

	out, err = c.RunWithCapture("node status --pretty")
	if err != nil {
		t.Fatal(err)
	}
	checkNodeStatus(t, c, out, start)
}
Ejemplo n.º 23
0
// scanLoop loops endlessly, scanning through replicas available via
// the replica set, or until the scanner is stopped. The iteration
// is paced to complete a full scan in approximately the scan interval.
func (rs *replicaScanner) scanLoop(clock *hlc.Clock, stopper *stop.Stopper) {
	stopper.RunWorker(func() {
		ctx := rs.AnnotateCtx(context.Background())
		start := timeutil.Now()

		// waitTimer is reset in each call to waitAndProcess.
		defer rs.waitTimer.Stop()

		for {
			if rs.GetDisabled() {
				if done := rs.waitEnabled(stopper); done {
					return
				}
				continue
			}
			var shouldStop bool
			count := 0
			rs.replicas.Visit(func(repl *Replica) bool {
				count++
				shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, repl)
				return !shouldStop
			})
			if count == 0 {
				// No replicas processed, just wait.
				shouldStop = rs.waitAndProcess(ctx, start, clock, stopper, nil)
			}

			shouldStop = shouldStop || nil != stopper.RunTask(func() {
				// Increment iteration count.
				rs.mu.Lock()
				defer rs.mu.Unlock()
				rs.mu.scanCount++
				rs.mu.total += timeutil.Since(start)
				if log.V(6) {
					log.Infof(ctx, "reset replica scan iteration")
				}

				// Reset iteration and start time.
				start = timeutil.Now()
			})
			if shouldStop {
				return
			}
		}
	})
}
Ejemplo n.º 24
0
func (t *logicTest) success(file string) {
	t.progress++
	now := timeutil.Now()
	if now.Sub(t.lastProgress) >= 2*time.Second {
		t.lastProgress = now
		fmt.Printf("--- progress: %s: %d statements/queries\n", file, t.progress)
	}
}
Ejemplo n.º 25
0
func TestEncodeDecodeRawSpan(t *testing.T) {
	s := basictracer.RawSpan{
		Context: basictracer.SpanContext{
			TraceID: 1,
			SpanID:  2,
			Sampled: true,
			Baggage: make(map[string]string),
		},
		ParentSpanID: 13,
		Operation:    "testop",
		Start:        timeutil.Now(),
		Duration:     15 * time.Millisecond,
		Tags:         make(map[string]interface{}),
		Logs: []opentracing.LogRecord{
			{
				Timestamp: timeutil.Now().Add(2 * time.Millisecond),
			},
			{
				Timestamp: timeutil.Now().Add(5 * time.Millisecond),
				Fields: []otlog.Field{
					otlog.Int("f1", 3),
					otlog.String("f2", "f2Val"),
				},
			},
		},
	}
	s.Context.Baggage["bag"] = "bagVal"
	s.Tags["tag"] = 5

	enc, err := EncodeRawSpan(&s, nil)
	if err != nil {
		t.Fatal(err)
	}
	var d basictracer.RawSpan
	if err = DecodeRawSpan(enc, &d); err != nil {
		t.Fatal(err)
	}
	// We cannot use DeepEqual because we encode all log fields as strings. So
	// e.g. the "f1" field above is now a string, not an int. The string
	// representations must match though.
	sStr := fmt.Sprint(s)
	dStr := fmt.Sprint(d)
	if sStr != dStr {
		t.Errorf("initial span: '%s', after encode/decode: '%s'", sStr, dStr)
	}
}
Ejemplo n.º 26
0
// processReplica processes a single replica. This should not be
// called externally to the queue. bq.mu.Lock must not be held
// while calling this method.
func (bq *baseQueue) processReplica(
	queueCtx context.Context, repl *Replica, clock *hlc.Clock,
) error {
	bq.processMu.Lock()
	defer bq.processMu.Unlock()

	// Load the system config.
	cfg, ok := bq.gossip.GetSystemConfig()
	if !ok {
		log.VEventf(queueCtx, 1, "no system config available, skipping")
		return nil
	}

	if bq.requiresSplit(cfg, repl) {
		// Range needs to be split due to zone configs, but queue does
		// not accept unsplit ranges.
		log.VEventf(queueCtx, 3, "%s: split needed; skipping", repl)
		return nil
	}

	// Putting a span in a context means that events will no longer go to the
	// event log. Use queueCtx for events that are intended for the event log.
	ctx, span := bq.AnnotateCtxWithSpan(queueCtx, bq.name)
	defer span.Finish()
	// Also add the Replica annotations to ctx.
	ctx = repl.AnnotateCtx(ctx)
	ctx, cancel := context.WithTimeout(ctx, bq.processTimeout)
	defer cancel()
	log.Eventf(ctx, "processing replica")

	// If the queue requires a replica to have the range lease in
	// order to be processed, check whether this replica has range lease
	// and renew or acquire if necessary.
	if bq.needsLease {
		// Create a "fake" get request in order to invoke redirectOnOrAcquireLease.
		if err := repl.redirectOnOrAcquireLease(ctx); err != nil {
			if _, harmless := err.GetDetail().(*roachpb.NotLeaseHolderError); harmless {
				log.VEventf(queueCtx, 3, "not holding lease; skipping")
				return nil
			}
			return errors.Wrapf(err.GoError(), "%s: could not obtain lease", repl)
		}
		log.Event(ctx, "got range lease")
	}

	log.VEventf(queueCtx, 3, "processing")
	start := timeutil.Now()
	err := bq.impl.process(ctx, clock.Now(), repl, cfg)
	duration := timeutil.Since(start)
	bq.processingNanos.Inc(duration.Nanoseconds())
	if err != nil {
		return err
	}
	log.VEventf(queueCtx, 2, "done: %s", duration)
	log.Event(ctx, "done")
	bq.successes.Inc(1)
	return nil
}
Ejemplo n.º 27
0
func testDecimalSingleArgFunc(
	t *testing.T,
	f func(*inf.Dec, *inf.Dec, inf.Scale) (*inf.Dec, error),
	s inf.Scale,
	tests []decimalOneArgTestCase,
) {
	for _, tc := range tests {
		t.Run(fmt.Sprintf("%s = %s", tc.input, tc.expected), func(t *testing.T) {
			x, exp := new(inf.Dec), new(inf.Dec)
			x.SetString(tc.input)
			exp.SetString(tc.expected)

			// Test allocated return value.
			var z *inf.Dec
			var err error
			done := make(chan struct{}, 1)
			start := timeutil.Now()
			go func() {
				z, err = f(nil, x, s)
				done <- struct{}{}
			}()
			var after <-chan time.Time
			if *flagDurationLimit > 0 {
				after = time.After(*flagDurationLimit)
			}
			select {
			case <-done:
				t.Logf("execute duration: %s", timeutil.Since(start))
			case <-after:
				t.Fatalf("timedout after %s", *flagDurationLimit)
			}
			if err != nil {
				if tc.expected != err.Error() {
					t.Errorf("expected error %s, got %s", tc.expected, err)
				}
				return
			}
			if exp.Cmp(z) != 0 {
				t.Errorf("expected %s, got %s", exp, z)
			}

			// Test provided decimal mutation.
			z.SetString("0.0")
			_, _ = f(z, x, s)
			if exp.Cmp(z) != 0 {
				t.Errorf("expected %s, got %s", exp, z)
			}

			// Test same arg mutation.
			_, _ = f(x, x, s)
			if exp.Cmp(x) != 0 {
				t.Errorf("expected %s, got %s", exp, x)
			}
			x.SetString(tc.input)
		})
	}
}
Ejemplo n.º 28
0
func testAdminLossOfQuorumInner(
	ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig,
) {
	if c.NumNodes() < 2 {
		t.Logf("skipping test %s because given cluster has too few nodes", cfg.Name)
		return
	}

	// Get the ids for each node.
	nodeIDs := make([]roachpb.NodeID, c.NumNodes())
	for i := 0; i < c.NumNodes(); i++ {
		var details serverpb.DetailsResponse
		if err := httputil.GetJSON(cluster.HTTPClient, c.URL(ctx, i)+"/_status/details/local", &details); err != nil {
			t.Fatalf("failed to get local details from node %d: %s", i, err)
		}
		nodeIDs[i] = details.NodeID
	}

	// Leave only the first node alive.
	for i := 1; i < c.NumNodes(); i++ {
		if err := c.Kill(ctx, i); err != nil {
			t.Fatal(err)
		}
	}

	// Retrieve node statuses.
	var nodes serverpb.NodesResponse
	if err := httputil.GetJSON(cluster.HTTPClient, c.URL(ctx, 0)+"/_status/nodes", &nodes); err != nil {
		t.Fatal(err)
	}

	for _, nodeID := range nodeIDs {
		var nodeStatus status.NodeStatus
		if err := httputil.GetJSON(cluster.HTTPClient, c.URL(ctx, 0)+"/_status/nodes/"+strconv.Itoa(int(nodeID)), &nodeStatus); err != nil {
			t.Fatal(err)
		}
	}

	// Retrieve time-series data.
	nowNanos := timeutil.Now().UnixNano()
	queryRequest := tspb.TimeSeriesQueryRequest{
		StartNanos: nowNanos - 10*time.Second.Nanoseconds(),
		EndNanos:   nowNanos,
		Queries: []tspb.Query{
			{Name: "doesn't_matter", Sources: []string{}},
		},
	}
	var queryResponse tspb.TimeSeriesQueryResponse
	if err := httputil.PostJSON(cluster.HTTPClient, c.URL(ctx, 0)+"/ts/query",
		&queryRequest, &queryResponse); err != nil {
		t.Fatal(err)
	}

	// TODO(cdo): When we're able to issue SQL queries without a quorum, test all
	// admin endpoints that issue SQL queries here.
}
Ejemplo n.º 29
0
func TestLeaseManagerReacquire(testingT *testing.T) {
	defer leaktest.AfterTest(testingT)()
	params, _ := createTestServerParams()
	removalTracker := csql.NewLeaseRemovalTracker()
	params.Knobs = base.TestingKnobs{
		SQLLeaseManager: &csql.LeaseManagerTestingKnobs{
			LeaseStoreTestingKnobs: csql.LeaseStoreTestingKnobs{
				LeaseReleasedEvent: removalTracker.LeaseRemovedNotification,
			},
		},
	}
	t := newLeaseTest(testingT, params)
	defer t.cleanup()

	const descID = keys.LeaseTableID

	// Acquire 2 leases from the same node. They should point to the same lease
	// structure.
	l1 := t.mustAcquire(1, descID, 0)
	l2 := t.mustAcquire(1, descID, 0)
	if l1 != l2 {
		t.Fatalf("expected same lease, but found %p != %p", l1, l2)
	}
	if l1.Refcount() != 2 {
		t.Fatalf("expected refcount of 2, but found %d", l1.Refcount())
	}
	t.expectLeases(descID, "/1/1")

	// Set the minimum lease duration such that the next lease acquisition will
	// require the lease to be reacquired.
	savedLeaseDuration, savedMinLeaseDuration := csql.LeaseDuration, csql.MinLeaseDuration
	defer func() {
		csql.LeaseDuration, csql.MinLeaseDuration = savedLeaseDuration, savedMinLeaseDuration
	}()
	csql.MinLeaseDuration = l1.Expiration().Sub(timeutil.Now())
	csql.LeaseDuration = 2 * csql.MinLeaseDuration

	// Another lease acquisition from the same node will result in a new lease.
	l3 := t.mustAcquire(1, descID, 0)
	if l1 == l3 {
		t.Fatalf("expected different leases, but found %p", l1)
	}
	if l3.Refcount() != 1 {
		t.Fatalf("expected refcount of 1, but found %d", l3.Refcount())
	}
	if l3.Expiration().Before(l1.Expiration()) {
		t.Fatalf("expected new lease expiration (%s) to be after old lease expiration (%s)",
			l3.Expiration(), l1.Expiration())
	}
	t.expectLeases(descID, "/1/1 /1/1")

	t.mustRelease(1, l1, nil)
	t.mustRelease(1, l2, removalTracker)
	t.mustRelease(1, l3, nil)
}
Ejemplo n.º 30
0
// monotonicUnixNano returns a monotonically increasing value for
// nanoseconds in Unix time. Since equal times are ignored with
// updates to infos, we're careful to avoid incorrectly ignoring a
// newly created value in the event one is created within the same
// nanosecond. Really unlikely except for the case of unittests, but
// better safe than sorry.
func monotonicUnixNano() int64 {
	monoTime.Lock()
	defer monoTime.Unlock()

	now := timeutil.Now().UnixNano()
	if now <= monoTime.last {
		now = monoTime.last + 1
	}
	monoTime.last = now
	return now
}