Example #1
0
// waitAndProcess waits for the pace interval and processes the replica
// if repl is not nil. The method returns true when the scanner needs
// to be stopped. The method also removes a replica from queues when it
// is signaled via the removed channel.
func (rs *replicaScanner) waitAndProcess(
	ctx context.Context, start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica,
) bool {
	waitInterval := rs.paceInterval(start, timeutil.Now())
	rs.waitTimer.Reset(waitInterval)
	if log.V(6) {
		log.Infof(ctx, "wait timer interval set to %s", waitInterval)
	}
	for {
		select {
		case <-rs.waitTimer.C:
			if log.V(6) {
				log.Infof(ctx, "wait timer fired")
			}
			rs.waitTimer.Read = true
			if repl == nil {
				return false
			}

			if log.V(2) {
				log.Infof(ctx, "replica scanner processing %s", repl)
			}
			for _, q := range rs.queues {
				q.MaybeAdd(repl, clock.Now())
			}
			return false

		case repl := <-rs.removed:
			rs.removeReplica(repl)

		case <-stopper.ShouldStop():
			return true
		}
	}
}
Example #2
0
// runHistoryWithRetry intercepts retry errors. If one is encountered,
// alternate histories are generated which all contain the exact
// history prefix which encountered the error, but which recombine the
// remaining commands with all of the commands from the retrying
// history.
//
// This process continues recursively if there are further retries.
func (hv *historyVerifier) runHistoryWithRetry(
	priorities []int32, isolations []enginepb.IsolationType, cmds []*cmd, db *client.DB, t *testing.T,
) error {
	if err := hv.runHistory(priorities, isolations, cmds, db, t); err != nil {
		if log.V(1) {
			log.Infof(context.Background(), "got an error running history %s: %s", historyString(cmds), err)
		}
		retry, ok := err.(*retryError)
		if !ok {
			return err
		}

		if _, hasRetried := hv.retriedTxns[retry.txnIdx]; hasRetried {
			if log.V(1) {
				log.Infof(context.Background(), "retried txn %d twice; skipping history", retry.txnIdx+1)
			}
			return nil
		}
		hv.retriedTxns[retry.txnIdx] = struct{}{}

		// Randomly subsample 5% of histories for reduced execution time.
		enumHis := sampleHistories(enumerateHistoriesAfterRetry(retry, cmds), 0.05)
		for i, h := range enumHis {
			if log.V(1) {
				log.Infof(context.Background(), "after retry, running alternate history %d of %d", i, len(enumHis))
			}
			if err := hv.runHistoryWithRetry(priorities, isolations, h, db, t); err != nil {
				return err
			}
		}
	}
	return nil
}
Example #3
0
// improve returns a candidate StoreDescriptor to rebalance a replica to. The
// strategy is to always converge on the mean range count. If that isn't
// possible, we don't return any candidate.
func (rcb rangeCountBalancer) improve(sl StoreList, excluded nodeIDSet) *roachpb.StoreDescriptor {
	// Attempt to select a better candidate from the supplied list.
	sl.stores = selectRandom(rcb.rand, allocatorRandomCount, sl, excluded)
	candidate := rcb.selectBest(sl)
	if candidate == nil {
		if log.V(2) {
			log.Infof(context.TODO(), "not rebalancing: no valid candidate targets: %s",
				formatCandidates(nil, sl.stores))
		}
		return nil
	}

	// Adding a replica to the candidate must make its range count converge on the
	// mean range count.
	rebalanceConvergesOnMean := rebalanceToConvergesOnMean(sl, *candidate)
	if !rebalanceConvergesOnMean {
		if log.V(2) {
			log.Infof(context.TODO(), "not rebalancing: %s wouldn't converge on the mean %.1f",
				formatCandidates(candidate, sl.stores), sl.candidateCount.mean)
		}
		return nil
	}

	if log.V(2) {
		log.Infof(context.TODO(), "rebalancing: mean=%.1f %s",
			sl.candidateCount.mean, formatCandidates(candidate, sl.stores))
	}
	return candidate
}
Example #4
0
// flush sends the rows accumulated so far in a StreamMessage.
func (m *outbox) flush(last bool, err error) error {
	if !last && m.numRows == 0 {
		return nil
	}
	msg := m.encoder.FormMessage(last, err)

	if log.V(3) {
		log.Infof(m.flowCtx.Context, "flushing outbox")
	}
	var sendErr error
	if m.stream != nil {
		sendErr = m.stream.Send(msg)
	} else {
		sendErr = m.syncFlowStream.Send(msg)
	}
	if sendErr != nil {
		if log.V(1) {
			log.Errorf(m.flowCtx.Context, "outbox flush error: %s", sendErr)
		}
	} else if log.V(3) {
		log.Infof(m.flowCtx.Context, "outbox flushed")
	}
	if sendErr != nil {
		return sendErr
	}

	m.numRows = 0
	return nil
}
Example #5
0
// AddMetricStruct examines all fields of metricStruct and adds
// all Iterable or metricGroup objects to the registry.
func (r *Registry) AddMetricStruct(metricStruct interface{}) {
	v := reflect.ValueOf(metricStruct)
	if v.Kind() == reflect.Ptr {
		v = v.Elem()
	}
	t := v.Type()

	for i := 0; i < v.NumField(); i++ {
		vfield, tfield := v.Field(i), t.Field(i)
		if !vfield.CanInterface() {
			if log.V(2) {
				log.Infof(context.TODO(), "Skipping unexported field %s", tfield.Name)
			}
			continue
		}
		val := vfield.Interface()
		switch typ := val.(type) {
		case Iterable:
			r.AddMetric(typ)
		case Struct:
			r.AddMetricStruct(typ)
		default:
			if log.V(2) {
				log.Infof(context.TODO(), "Skipping non-metric field %s", tfield.Name)
			}
		}
	}
}
Example #6
0
// Run is part of the processor interface.
func (m *mergeJoiner) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(m.ctx, "merge joiner")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting merge joiner run")
		defer log.Infof(ctx, "exiting merge joiner run")
	}

	for {
		batch, err := m.streamMerger.NextBatch()
		if err != nil || len(batch) == 0 {
			m.output.Close(err)
			return
		}
		for _, rowPair := range batch {
			row, _, err := m.render(rowPair[0], rowPair[1])
			if err != nil {
				m.output.Close(err)
				return
			}
			if row != nil && !m.output.PushRow(row) {
				if log.V(2) {
					log.Infof(ctx, "no more rows required")
				}
				m.output.Close(nil)
				return
			}
		}
	}
}
Example #7
0
// wrap the supplied planNode with the sortNode if sorting is required.
// The first returned value is "true" if the sort node can be squashed
// in the selectTopNode (sorting unneeded).
func (n *sortNode) wrap(plan planNode) (bool, planNode) {
	if n != nil {
		// Check to see if the requested ordering is compatible with the existing
		// ordering.
		existingOrdering := plan.Ordering()
		if log.V(2) {
			log.Infof(n.ctx, "Sort: existing=%+v desired=%+v", existingOrdering, n.ordering)
		}
		match := computeOrderingMatch(n.ordering, existingOrdering, false)
		if match < len(n.ordering) {
			n.plan = plan
			n.needSort = true
			return false, n
		}

		if len(n.columns) < len(plan.Columns()) {
			// No sorting required, but we have to strip off the extra render
			// expressions we added.
			n.plan = plan
			return false, n
		}

		if log.V(2) {
			log.Infof(n.ctx, "Sort: no sorting required")
		}
	}

	return true, plan
}
Example #8
0
// throttle informs the store pool that the given remote store declined a
// snapshot or failed to apply one, ensuring that it will not be considered
// for up-replication or rebalancing until after the configured timeout period
// has elapsed. Declined being true indicates that the remote store explicitly
// declined a snapshot.
func (sp *StorePool) throttle(reason throttleReason, toStoreID roachpb.StoreID) {
	sp.mu.Lock()
	defer sp.mu.Unlock()
	detail := sp.getStoreDetailLocked(toStoreID)
	ctx := sp.AnnotateCtx(context.TODO())

	// If a snapshot is declined, be it due to an error or because it was
	// rejected, we mark the store detail as having been declined so it won't
	// be considered as a candidate for new replicas until after the configured
	// timeout period has passed.
	switch reason {
	case throttleDeclined:
		detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.declinedReservationsTimeout)
		if log.V(2) {
			log.Infof(ctx, "snapshot declined, store:%s will be throttled for %s until %s",
				toStoreID, sp.declinedReservationsTimeout, detail.throttledUntil)
		}
	case throttleFailed:
		detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.failedReservationsTimeout)
		if log.V(2) {
			log.Infof(ctx, "snapshot failed, store:%s will be throttled for %s until %s",
				toStoreID, sp.failedReservationsTimeout, detail.throttledUntil)
		}
	}
}
Example #9
0
// deleteRow adds to the batch the kv operations necessary to delete a table row
// with the given values.
func (rd *rowDeleter) deleteRow(ctx context.Context, b *client.Batch, values []parser.Datum) error {
	if err := rd.fks.checkAll(values); err != nil {
		return err
	}

	primaryIndexKey, secondaryIndexEntries, err := rd.helper.encodeIndexes(rd.fetchColIDtoRowIndex, values)
	if err != nil {
		return err
	}

	for _, secondaryIndexEntry := range secondaryIndexEntries {
		if log.V(2) {
			log.Infof(ctx, "Del %s", secondaryIndexEntry.Key)
		}
		b.Del(secondaryIndexEntry.Key)
	}

	// Delete the row.
	rd.startKey = roachpb.Key(primaryIndexKey)
	rd.endKey = roachpb.Key(encoding.EncodeNotNullDescending(primaryIndexKey))
	if log.V(2) {
		log.Infof(ctx, "DelRange %s - %s", rd.startKey, rd.endKey)
	}
	b.DelRange(&rd.startKey, &rd.endKey, false)
	rd.startKey, rd.endKey = nil, nil

	return nil
}
Example #10
0
// Run is part of the processor interface.
func (d *distinct) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(d.ctx, "distinct")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting distinct process")
		defer log.Infof(ctx, "exiting distinct")
	}

	var scratch []byte
	for {
		row, err := d.input.NextRow()
		if err != nil || row == nil {
			d.output.Close(err)
			return
		}

		// If we are processing DISTINCT(x, y) and the input stream is ordered
		// by x, we define x to be our group key. Our seen set at any given time
		// is only the set of all rows with the same group key. The encoding of
		// the row is the key we use in our 'seen' set.
		encoding, err := d.encode(scratch, row)
		if err != nil {
			d.output.Close(err)
			return
		}

		// The 'seen' set is reset whenever we find consecutive rows differing on the
		// group key thus avoiding the need to store encodings of all rows.
		matched, err := d.matchLastGroupKey(row)
		if err != nil {
			d.output.Close(err)
			return
		}

		if !matched {
			d.lastGroupKey = row
			d.seen = make(map[string]struct{})
		}

		key := string(encoding)
		if _, ok := d.seen[key]; !ok {
			d.seen[key] = struct{}{}
			if !d.output.PushRow(row) {
				if log.V(2) {
					log.Infof(ctx, "no more rows required")
				}
				d.output.Close(nil)
				return
			}
		}
		scratch = encoding[:0]
	}
}
Example #11
0
// Run is part of the processor interface.
func (ev *evaluator) Run(wg *sync.WaitGroup) {
	if wg != nil {
		defer wg.Done()
	}

	ctx, span := tracing.ChildSpan(ev.ctx, "evaluator")
	defer tracing.FinishSpan(span)

	if log.V(2) {
		log.Infof(ctx, "starting evaluator process")
		defer log.Infof(ctx, "exiting evaluator")
	}

	first := true
	for {
		row, err := ev.input.NextRow()
		if err != nil || row == nil {
			ev.output.Close(err)
			return
		}

		if first {
			first = false

			types := make([]sqlbase.ColumnType_Kind, len(row))
			for i := range types {
				types[i] = row[i].Type
			}
			for i, expr := range ev.specExprs {
				err := ev.exprs[i].init(expr, types, ev.flowCtx.evalCtx)
				if err != nil {
					ev.output.Close(err)
					return
				}
				ev.exprTypes[i] = sqlbase.DatumTypeToColumnKind(ev.exprs[i].expr.ResolvedType())
			}
		}

		outRow, err := ev.eval(row)
		if err != nil {
			ev.output.Close(err)
			return
		}

		if log.V(3) {
			log.Infof(ctx, "pushing %s\n", outRow)
		}
		// Push the row to the output RowReceiver; stop if they don't need more
		// rows.
		if !ev.output.PushRow(outRow) {
			if log.V(2) {
				log.Infof(ctx, "no more rows required")
			}
			ev.output.Close(nil)
			return
		}
	}
}
Example #12
0
func (r *RocksDB) open() error {
	var ver storageVersion
	if len(r.dir) != 0 {
		log.Infof(context.TODO(), "opening rocksdb instance at %q", r.dir)

		// Check the version number.
		var err error
		if ver, err = getVersion(r.dir); err != nil {
			return err
		}
		if ver < versionMinimum || ver > versionCurrent {
			// Instead of an error, we should call a migration if possible when
			// one is needed immediately following the DBOpen call.
			return fmt.Errorf("incompatible rocksdb data version, current:%d, on disk:%d, minimum:%d",
				versionCurrent, ver, versionMinimum)
		}
	} else {
		if log.V(2) {
			log.Infof(context.TODO(), "opening in memory rocksdb instance")
		}

		// In memory dbs are always current.
		ver = versionCurrent
	}

	blockSize := envutil.EnvOrDefaultBytes("COCKROACH_ROCKSDB_BLOCK_SIZE", defaultBlockSize)
	walTTL := envutil.EnvOrDefaultDuration("COCKROACH_ROCKSDB_WAL_TTL", 0).Seconds()

	status := C.DBOpen(&r.rdb, goToCSlice([]byte(r.dir)),
		C.DBOptions{
			cache:           r.cache.cache,
			block_size:      C.uint64_t(blockSize),
			wal_ttl_seconds: C.uint64_t(walTTL),
			allow_os_buffer: C.bool(true),
			logging_enabled: C.bool(log.V(3)),
			num_cpu:         C.int(runtime.NumCPU()),
			max_open_files:  C.int(r.maxOpenFiles),
		})
	if err := statusToError(status); err != nil {
		return errors.Errorf("could not open rocksdb instance: %s", err)
	}

	// Update or add the version file if needed.
	if ver < versionCurrent {
		if err := writeVersionFile(r.dir); err != nil {
			return err
		}
	}

	// Start a goroutine that will finish when the underlying handle
	// is deallocated. This is used to check a leak in tests.
	go func() {
		<-r.deallocated
	}()
	return nil
}
Example #13
0
func (t *parallelTest) setup(spec *parTestSpec) {
	if spec.ClusterSize == 0 {
		spec.ClusterSize = 1
	}

	if testing.Verbose() || log.V(1) {
		log.Infof(t.ctx, "Cluster Size: %d", spec.ClusterSize)
	}

	args := base.TestClusterArgs{
		ServerArgs: base.TestServerArgs{
			Knobs: base.TestingKnobs{
				SQLExecutor: &sql.ExecutorTestingKnobs{
					WaitForGossipUpdate:   true,
					CheckStmtStringChange: true,
				},
			},
		},
	}
	t.cluster = serverutils.StartTestCluster(t, spec.ClusterSize, args)
	t.clients = make([][]*gosql.DB, spec.ClusterSize)
	for i := range t.clients {
		t.clients[i] = append(t.clients[i], t.cluster.ServerConn(i))
	}
	r0 := sqlutils.MakeSQLRunner(t, t.clients[0][0])

	if spec.RangeSplitSize != 0 {
		if testing.Verbose() || log.V(1) {
			log.Infof(t.ctx, "Setting range split size: %d", spec.RangeSplitSize)
		}
		zoneCfg := config.DefaultZoneConfig()
		zoneCfg.RangeMaxBytes = int64(spec.RangeSplitSize)
		zoneCfg.RangeMinBytes = zoneCfg.RangeMaxBytes / 2
		buf, err := protoutil.Marshal(&zoneCfg)
		if err != nil {
			t.Fatal(err)
		}
		objID := keys.RootNamespaceID
		r0.Exec(`UPDATE system.zones SET config = $2 WHERE id = $1`, objID, buf)
	}

	if testing.Verbose() || log.V(1) {
		log.Infof(t.ctx, "Creating database")
	}

	r0.Exec("CREATE DATABASE test")
	for i := range t.clients {
		sqlutils.MakeSQLRunner(t, t.clients[i][0]).Exec("SET DATABASE = test")
	}

	if testing.Verbose() || log.V(1) {
		log.Infof(t.ctx, "Test setup done")
	}
}
Example #14
0
// If the time is greater than the timestamp stored at `key`, run `f`.
// Before running `f`, the timestamp is updated forward by a small amount via
// a compare-and-swap to ensure at-most-one concurrent execution. After `f`
// executes the timestamp is set to the next execution time.
// Returns how long until `f` should be run next (i.e. when this method should
// be called again).
func (s *Server) maybeRunPeriodicCheck(
	op string, key roachpb.Key, f func(context.Context),
) time.Duration {
	ctx, span := s.AnnotateCtxWithSpan(context.Background(), "op")
	defer span.Finish()

	// Add the op name to the log context.
	ctx = log.WithLogTag(ctx, op, nil)

	resp, err := s.db.Get(ctx, key)
	if err != nil {
		log.Infof(ctx, "error reading time: %s", err)
		return updateCheckRetryFrequency
	}

	// We should early returned below if either the next check time is in the
	// future or if the atomic compare-and-set of that time failed (which
	// would happen if two nodes tried at the same time).
	if resp.Exists() {
		whenToCheck, pErr := resp.Value.GetTime()
		if pErr != nil {
			log.Warningf(ctx, "error decoding time: %s", err)
			return updateCheckRetryFrequency
		} else if delay := whenToCheck.Sub(timeutil.Now()); delay > 0 {
			return delay
		}

		nextRetry := whenToCheck.Add(updateCheckRetryFrequency)
		if err := s.db.CPut(ctx, key, nextRetry, whenToCheck); err != nil {
			if log.V(2) {
				log.Infof(ctx, "could not set next version check time (maybe another node checked?): %s", err)
			}
			return updateCheckRetryFrequency
		}
	} else {
		log.Infof(ctx, "No previous %s time.", op)
		nextRetry := timeutil.Now().Add(updateCheckRetryFrequency)
		// CPut with `nil` prev value to assert that no other node has checked.
		if err := s.db.CPut(ctx, key, nextRetry, nil); err != nil {
			if log.V(2) {
				log.Infof(ctx, "Could not set %s time (maybe another node checked?): %v", op, err)
			}
			return updateCheckRetryFrequency
		}
	}

	f(ctx)

	if err := s.db.Put(ctx, key, timeutil.Now().Add(updateCheckFrequency)); err != nil {
		log.Infof(ctx, "Error updating %s time: %v", op, err)
	}
	return updateCheckFrequency
}
Example #15
0
// clearOverlappingCachedRangeDescriptors looks up and clears any
// cache entries which overlap the specified descriptor.
func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors(
	desc *roachpb.RangeDescriptor,
) error {
	key := desc.EndKey
	metaKey, err := meta(key)
	if err != nil {
		return err
	}

	// Clear out any descriptors which subsume the key which we're going
	// to cache. For example, if an existing KeyMin->KeyMax descriptor
	// should be cleared out in favor of a KeyMin->"m" descriptor.
	k, v, ok := rdc.rangeCache.cache.Ceil(rangeCacheKey(metaKey))
	if ok {
		descriptor := v.(*roachpb.RangeDescriptor)
		if descriptor.StartKey.Less(key) && !descriptor.EndKey.Less(key) {
			if log.V(2) {
				log.Infof(rdc.ctx, "clearing overlapping descriptor: key=%s desc=%s", k, descriptor)
			}
			rdc.rangeCache.cache.Del(k.(rangeCacheKey))
		}
	}

	startMeta, err := meta(desc.StartKey)
	if err != nil {
		return err
	}
	endMeta, err := meta(desc.EndKey)
	if err != nil {
		return err
	}

	// Also clear any descriptors which are subsumed by the one we're
	// going to cache. This could happen on a merge (and also happens
	// when there's a lot of concurrency). Iterate from the range meta key
	// after RangeMetaKey(desc.StartKey) to the range meta key for desc.EndKey.
	var keys []rangeCacheKey
	rdc.rangeCache.cache.DoRange(func(k, v interface{}) bool {
		if log.V(2) {
			log.Infof(rdc.ctx, "clearing subsumed descriptor: key=%s desc=%s",
				k, v.(*roachpb.RangeDescriptor))
		}
		keys = append(keys, k.(rangeCacheKey))

		return false
	}, rangeCacheKey(startMeta.Next()), rangeCacheKey(endMeta))

	for _, key := range keys {
		rdc.rangeCache.cache.Del(key)
	}
	return nil
}
Example #16
0
// manage manages outgoing clients. Periodically, the infostore is
// scanned for infos with hop count exceeding the MaxHops
// threshold. If the number of outgoing clients doesn't exceed
// maxPeers(), a new gossip client is connected to a randomly selected
// peer beyond MaxHops threshold. Otherwise, the least useful peer
// node is cut off to make room for a replacement. Disconnected
// clients are processed via the disconnected channel and taken out of
// the outgoing address set. If there are no longer any outgoing
// connections or the sentinel gossip is unavailable, the bootstrapper
// is notified via the stalled conditional variable.
func (g *Gossip) manage() {
	g.server.stopper.RunWorker(func() {
		ctx := g.AnnotateCtx(context.Background())
		cullTicker := time.NewTicker(g.jitteredInterval(g.cullInterval))
		stallTicker := time.NewTicker(g.jitteredInterval(g.stallInterval))
		defer cullTicker.Stop()
		defer stallTicker.Stop()
		for {
			select {
			case <-g.server.stopper.ShouldStop():
				return
			case c := <-g.disconnected:
				g.doDisconnected(c)
			case nodeID := <-g.tighten:
				g.tightenNetwork(nodeID)
			case <-cullTicker.C:
				func() {
					g.mu.Lock()
					if !g.outgoing.hasSpace() {
						leastUsefulID := g.mu.is.leastUseful(g.outgoing)

						if c := g.findClient(func(c *client) bool {
							return c.peerID == leastUsefulID
						}); c != nil {
							if log.V(1) {
								log.Infof(ctx, "closing least useful client %+v to tighten network graph", c)
							}
							log.Eventf(ctx, "culling %s", c.addr)
							c.close()

							// After releasing the lock, block until the client disconnects.
							defer func() {
								g.doDisconnected(<-g.disconnected)
							}()
						} else {
							if log.V(1) {
								g.clientsMu.Lock()
								log.Infof(ctx, "couldn't find least useful client among %+v", g.clientsMu.clients)
								g.clientsMu.Unlock()
							}
						}
					}
					g.mu.Unlock()
				}()
			case <-stallTicker.C:
				g.mu.Lock()
				g.maybeSignalStatusChangeLocked()
				g.mu.Unlock()
			}
		}
	})
}
Example #17
0
func (ctx *Context) removeConnLocked(key string, meta *connMeta) {
	if log.V(1) {
		log.Infof(ctx.masterCtx, "closing %s", key)
	}
	if conn := meta.conn; conn != nil {
		if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) {
			if log.V(1) {
				log.Errorf(ctx.masterCtx, "failed to close client connection: %s", err)
			}
		}
	}
	delete(ctx.conns.cache, key)
}
Example #18
0
// processValueSingle processes the given value (of column
// family.DefaultColumnID), setting values in the rf.row accordingly. The key is
// only used for logging.
func (rf *RowFetcher) processValueSingle(
	family *ColumnFamilyDescriptor, kv client.KeyValue, debugStrings bool, prettyKeyPrefix string,
) (prettyKey string, prettyValue string, err error) {
	prettyKey = prettyKeyPrefix

	colID := family.DefaultColumnID
	if colID == 0 {
		// If this is the sentinel family, a value is not expected, so we're done.
		// Otherwise, this means something went wrong in the TableDescriptor
		// bookkeeping.
		if family.ID == keys.SentinelFamilyID {
			return "", "", nil
		}
		return "", "", errors.Errorf("single entry value with no default column id")
	}

	idx, ok := rf.colIdxMap[colID]
	if ok && (debugStrings || rf.valNeededForCol[idx]) {
		if debugStrings {
			prettyKey = fmt.Sprintf("%s/%s", prettyKey, rf.desc.Columns[idx].Name)
		}
		typ := rf.cols[idx].Type
		// TODO(arjun): The value is a directly marshaled single value, so we
		// unmarshal it eagerly here. This can potentially be optimized out,
		// although that would require changing UnmarshalColumnValue to operate
		// on bytes, and for Encode/DecodeTableValue to operate on marshaled
		// single values.
		value, err := UnmarshalColumnValue(&rf.alloc, typ, kv.Value)
		if err != nil {
			return "", "", err
		}
		if debugStrings {
			prettyValue = value.String()
		}
		if !rf.row[idx].IsUnset() {
			panic(fmt.Sprintf("duplicate value for column %d", idx))
		}
		rf.row[idx] = DatumToEncDatum(typ, value)
		if log.V(3) {
			log.Infof(context.TODO(), "Scan %s -> %v", kv.Key, value)
		}
	} else {
		// No need to unmarshal the column value. Either the column was part of
		// the index key or it isn't needed.
		if log.V(3) {
			log.Infof(context.TODO(), "Scan %s -> [%d] (skipped)", kv.Key, colID)
		}
	}

	return prettyKey, prettyValue, nil
}
Example #19
0
func (m *outbox) mainLoop() error {
	if m.syncFlowStream == nil {
		conn, err := m.flowCtx.rpcCtx.GRPCDial(m.addr)
		if err != nil {
			return err
		}
		client := NewDistSQLClient(conn)
		if log.V(2) {
			log.Infof(m.flowCtx.Context, "outbox: calling FlowStream")
		}
		m.stream, err = client.FlowStream(context.TODO())
		if err != nil {
			if log.V(1) {
				log.Infof(m.flowCtx.Context, "FlowStream error: %s", err)
			}
			return err
		}
		if log.V(2) {
			log.Infof(m.flowCtx.Context, "outbox: FlowStream returned")
		}
	}

	flushTicker := time.NewTicker(outboxFlushPeriod)
	defer flushTicker.Stop()

	for {
		select {
		case d, ok := <-m.RowChannel.C:
			if !ok {
				// No more data.
				return m.flush(true, nil)
			}
			err := d.Err
			if err == nil {
				err = m.addRow(d.Row)
			}
			if err != nil {
				// Try to flush to send out the error, but ignore any
				// send error.
				_ = m.flush(true, err)
				return err
			}
		case <-flushTicker.C:
			err := m.flush(false, nil)
			if err != nil {
				return err
			}
		}
	}
}
Example #20
0
func (rq *replicateQueue) shouldQueue(
	ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig,
) (shouldQ bool, priority float64) {
	if !repl.store.splitQueue.Disabled() && repl.needsSplitBySize() {
		// If the range exceeds the split threshold, let that finish first.
		// Ranges must fit in memory on both sender and receiver nodes while
		// being replicated. This supplements the check provided by
		// acceptsUnsplitRanges, which looks at zone config boundaries rather
		// than data size.
		//
		// This check is ignored if the split queue is disabled, since in that
		// case, the split will never come.
		return
	}

	// Find the zone config for this range.
	desc := repl.Desc()
	zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey)
	if err != nil {
		log.Error(ctx, err)
		return
	}

	action, priority := rq.allocator.ComputeAction(zone, desc)
	if action != AllocatorNoop {
		if log.V(2) {
			log.Infof(ctx, "%s repair needed (%s), enqueuing", repl, action)
		}
		return true, priority
	}
	// See if there is a rebalancing opportunity present.
	leaseStoreID := repl.store.StoreID()
	if lease, _ := repl.getLease(); lease != nil {
		leaseStoreID = lease.Replica.StoreID
	}
	target := rq.allocator.RebalanceTarget(
		zone.Constraints,
		desc.Replicas,
		leaseStoreID,
		desc.RangeID,
	)
	if log.V(2) {
		if target != nil {
			log.Infof(ctx, "%s rebalance target found, enqueuing", repl)
		} else {
			log.Infof(ctx, "%s no rebalance target found, not enqueuing", repl)
		}
	}
	return target != nil, 0
}
Example #21
0
// processValueSingle processes the given value (of column
// family.DefaultColumnID), setting values in the rf.row accordingly. The key is
// only used for logging.
func (rf *RowFetcher) processValueSingle(
	family *ColumnFamilyDescriptor, kv client.KeyValue, debugStrings bool, prettyKeyPrefix string,
) (prettyKey string, prettyValue string, err error) {
	prettyKey = prettyKeyPrefix

	colID := family.DefaultColumnID
	if colID == 0 {
		// If this is the sentinel family, a value is not expected, so we're done.
		// Otherwise, this means something went wrong in the TableDescriptor
		// bookkeeping.
		if family.ID == keys.SentinelFamilyID {
			return "", "", nil
		}
		return "", "", errors.Errorf("single entry value with no default column id")
	}

	idx, ok := rf.colIdxMap[colID]
	if ok && (debugStrings || rf.valNeededForCol[idx]) {
		if debugStrings {
			prettyKey = fmt.Sprintf("%s/%s", prettyKey, rf.desc.Columns[idx].Name)
		}
		kind := rf.cols[idx].Type.Kind
		// TODO(dan): Once we decide if we're changing the tuple encoding, see if we
		// can get rid of UnmarshalColumnValue in favor of DecodeTableValue.
		value, err := UnmarshalColumnValue(&rf.alloc, kind, kv.Value)
		if err != nil {
			return "", "", err
		}
		if debugStrings {
			prettyValue = value.String()
		}
		if rf.row[idx] != nil {
			panic(fmt.Sprintf("duplicate value for column %d", idx))
		}
		rf.row[idx] = value
		if log.V(3) {
			log.Infof(context.TODO(), "Scan %s -> %v", kv.Key, value)
		}
	} else {
		// No need to unmarshal the column value. Either the column was part of
		// the index key or it isn't needed.
		if log.V(3) {
			log.Infof(context.TODO(), "Scan %s -> [%d] (skipped)", kv.Key, colID)
		}
	}

	return prettyKey, prettyValue, nil
}
Example #22
0
func (t *parallelTest) run(dir string) {
	// Process the spec file.
	mainFile := filepath.Join(dir, "test.yaml")
	yamlData, err := ioutil.ReadFile(mainFile)
	if err != nil {
		t.Fatalf("%s: %s", mainFile, err)
	}
	var spec parTestSpec
	err = yaml.Unmarshal(yamlData, &spec)
	if err != nil {
		t.Fatalf("%s: %s", mainFile, err)
	}

	if spec.SkipReason != "" {
		t.Skip(spec.SkipReason)
	}

	log.Infof(t.ctx, "Running test %s", dir)
	if testing.Verbose() || log.V(1) {
		log.Infof(t.ctx, "spec: %+v", spec)
	}

	t.setup(&spec)
	defer t.close()

	for runListIdx, runList := range spec.Run {
		if testing.Verbose() || log.V(1) {
			var descr []string
			for _, re := range runList {
				descr = append(descr, fmt.Sprintf("%d:%s", re.Node, re.File))
			}
			log.Infof(t.ctx, "%s: run list %d: %s", mainFile, runListIdx,
				strings.Join(descr, ", "))
		}
		// Store the number of clients used so far (per node).
		numClients := make([]int, spec.ClusterSize)
		ch := make(chan bool)
		for _, re := range runList {
			client := t.getClient(re.Node, numClients[re.Node])
			numClients[re.Node]++
			go t.processTestFile(filepath.Join(dir, re.File), re.Node, client, ch)
		}
		// Wait for all clients to complete.
		for range runList {
			<-ch
		}
	}
}
Example #23
0
// getTableLeaseByID is a by-ID variant of getTableLease (i.e. uses same cache).
func (p *planner) getTableLeaseByID(tableID sqlbase.ID) (*sqlbase.TableDescriptor, error) {
	if log.V(2) {
		log.Infof(p.ctx(), "planner acquiring lease on table ID %d", tableID)
	}

	if testDisableTableLeases {
		table, err := sqlbase.GetTableDescFromID(p.txn, tableID)
		if err != nil {
			return nil, err
		}
		if err := filterTableState(table); err != nil {
			return nil, err
		}
		return table, nil
	}

	// First, look to see if we already have a lease for this table -- including
	// leases acquired via `getTableLease`.
	var lease *LeaseState
	for _, l := range p.leases {
		if l.ID == tableID {
			lease = l
			if log.V(2) {
				log.Infof(p.ctx(), "found lease in planner cache for table %d", tableID)
			}
			break
		}
	}

	// If we didn't find a lease or the lease is about to expire, acquire one.
	if lease == nil || p.removeLeaseIfExpiring(lease) {
		var err error
		lease, err = p.leaseMgr.Acquire(p.txn, tableID, 0)
		if err != nil {
			if err == sqlbase.ErrDescriptorNotFound {
				// Transform the descriptor error into an error that references the
				// table's ID.
				return nil, sqlbase.NewUndefinedTableError(fmt.Sprintf("<id=%d>", tableID))
			}
			return nil, err
		}
		p.leases = append(p.leases, lease)
		// If the lease we just acquired expires before the txn's deadline, reduce
		// the deadline.
		p.txn.UpdateDeadlineMaybe(hlc.Timestamp{WallTime: lease.Expiration().UnixNano()})
	}
	return &lease.TableDescriptor, nil
}
Example #24
0
// GetTotalMemory returns either the total system memory or if possible the
// cgroups available memory.
func GetTotalMemory() (int64, error) {
	mem := gosigar.Mem{}
	if err := mem.Get(); err != nil {
		return 0, err
	}
	if mem.Total > math.MaxInt64 {
		return 0, fmt.Errorf("inferred memory size %s exceeds maximum supported memory size %s",
			humanize.IBytes(mem.Total), humanize.Bytes(math.MaxInt64))
	}
	totalMem := int64(mem.Total)
	if runtime.GOOS == "linux" {
		var err error
		var buf []byte
		if buf, err = ioutil.ReadFile(defaultCGroupMemPath); err != nil {
			if log.V(1) {
				log.Infof(context.TODO(), "can't read available memory from cgroups (%s), using system memory %s instead", err,
					humanizeutil.IBytes(totalMem))
			}
			return totalMem, nil
		}
		var cgAvlMem uint64
		if cgAvlMem, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64); err != nil {
			if log.V(1) {
				log.Infof(context.TODO(), "can't parse available memory from cgroups (%s), using system memory %s instead", err,
					humanizeutil.IBytes(totalMem))
			}
			return totalMem, nil
		}
		if cgAvlMem > math.MaxInt64 {
			if log.V(1) {
				log.Infof(context.TODO(), "available memory from cgroups is too large and unsupported %s using system memory %s instead",
					humanize.IBytes(cgAvlMem), humanizeutil.IBytes(totalMem))

			}
			return totalMem, nil
		}
		if cgAvlMem > mem.Total {
			if log.V(1) {
				log.Infof(context.TODO(), "available memory from cgroups %s exceeds system memory %s, using system memory",
					humanize.IBytes(cgAvlMem), humanizeutil.IBytes(totalMem))
			}
			return totalMem, nil
		}

		return int64(cgAvlMem), nil
	}
	return totalMem, nil
}
Example #25
0
func TestDockerCLI(t *testing.T) {
	containerConfig := container.Config{
		Image: postgresTestImage,
		Cmd:   []string{"stat", cluster.CockroachBinaryInContainer},
	}
	if err := testDockerOneShot(t, "cli_test", containerConfig); err != nil {
		t.Skipf(`TODO(dt): No binary in one-shot container, see #6086: %s`, err)
	}

	paths, err := filepath.Glob(testGlob)
	if err != nil {
		t.Fatal(err)
	}
	if len(paths) == 0 {
		t.Fatalf("no testfiles found (%v)", testGlob)
	}

	verbose := testing.Verbose() || log.V(1)
	for _, p := range paths {
		testFile := filepath.Base(p)
		testPath := filepath.Join(containerPath, testFile)
		t.Run(testFile, func(t *testing.T) {
			cmd := cmdBase
			if verbose {
				cmd = append(cmd, "-d")
			}
			cmd = append(cmd, "-f", testPath, cluster.CockroachBinaryInContainer)
			containerConfig.Cmd = cmd
			if err := testDockerOneShot(t, "cli_test", containerConfig); err != nil {
				t.Error(err)
			}
		})
	}
}
Example #26
0
// getDatabaseID implements the DatabaseAccessor interface.
func (p *planner) getDatabaseID(name string) (sqlbase.ID, error) {
	if virtual := p.session.virtualSchemas.getVirtualDatabaseDesc(name); virtual != nil {
		return virtual.GetID(), nil
	}

	if id := p.databaseCache.getID(name); id != 0 {
		return id, nil
	}

	// Lookup the database in the cache first, falling back to the KV store if it
	// isn't present. The cache might cause the usage of a recently renamed
	// database, but that's a race that could occur anyways.
	desc, err := p.getCachedDatabaseDesc(name)
	if err != nil {
		if log.V(3) {
			log.Infof(p.ctx(), "%v", err)
		}
		var err error
		desc, err = p.mustGetDatabaseDesc(name)
		if err != nil {
			return 0, err
		}
	}

	p.databaseCache.setID(name, desc.ID)
	return desc.ID, nil
}
Example #27
0
func (p *planner) createDescriptorWithID(
	idKey roachpb.Key, id sqlbase.ID, descriptor sqlbase.DescriptorProto,
) error {
	descriptor.SetID(id)
	// TODO(pmattis): The error currently returned below is likely going to be
	// difficult to interpret.
	//
	// TODO(pmattis): Need to handle if-not-exists here as well.
	//
	// TODO(pmattis): This is writing the namespace and descriptor table entries,
	// but not going through the normal INSERT logic and not performing a precise
	// mimicry. In particular, we're only writing a single key per table, while
	// perfect mimicry would involve writing a sentinel key for each row as well.
	descKey := sqlbase.MakeDescMetadataKey(descriptor.GetID())

	b := &client.Batch{}
	descID := descriptor.GetID()
	descDesc := sqlbase.WrapDescriptor(descriptor)
	if log.V(2) {
		log.Infof(p.ctx(), "CPut %s -> %d", idKey, descID)
		log.Infof(p.ctx(), "CPut %s -> %s", descKey, descDesc)
	}
	b.CPut(idKey, descID, nil)
	b.CPut(descKey, descDesc, nil)

	p.setTestingVerifyMetadata(func(systemConfig config.SystemConfig) error {
		if err := expectDescriptorID(systemConfig, idKey, descID); err != nil {
			return err
		}
		return expectDescriptor(systemConfig, descKey, descDesc)
	})

	return p.txn.Run(b)
}
Example #28
0
func (s *Server) reportUsage(ctx context.Context) {
	b := new(bytes.Buffer)
	if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil {
		log.Warning(ctx, err)
		return
	}

	q := reportingURL.Query()
	q.Set("version", build.GetInfo().Tag)
	q.Set("uuid", s.node.ClusterID.String())
	reportingURL.RawQuery = q.Encode()

	res, err := http.Post(reportingURL.String(), "application/json", b)
	if err != nil && log.V(2) {
		// This is probably going to be relatively common in production
		// environments where network access is usually curtailed.
		log.Warning(ctx, "Failed to report node usage metrics: ", err)
		return
	}

	if res.StatusCode != http.StatusOK {
		b, err := ioutil.ReadAll(res.Body)
		log.Warningf(ctx, "Failed to report node usage metrics: status: %s, body: %s, "+
			"error: %v", res.Status, b, err)
	}
}
Example #29
0
// AllocateTarget returns a suitable store for a new allocation with the
// required attributes. Nodes already accommodating existing replicas are ruled
// out as targets. The range ID of the replica being allocated for is also
// passed in to ensure that we don't try to replace an existing dead replica on
// a store. If relaxConstraints is true, then the required attributes will be
// relaxed as necessary, from least specific to most specific, in order to
// allocate a target.
func (a *Allocator) AllocateTarget(
	constraints config.Constraints,
	existing []roachpb.ReplicaDescriptor,
	rangeID roachpb.RangeID,
	relaxConstraints bool,
) (*roachpb.StoreDescriptor, error) {
	sl, aliveStoreCount, throttledStoreCount := a.storePool.getStoreList(rangeID)
	if a.options.UseRuleSolver {
		candidates := allocateCandidates(
			sl,
			constraints,
			existing,
			a.storePool.getNodeLocalities(existing),
			a.storePool.deterministic,
		)
		if log.V(3) {
			log.Infof(context.TODO(), "allocate candidates: %s", candidates)
		}
		if target := candidates.selectGood(a.randGen); target != nil {
			return target, nil
		}

		// When there are throttled stores that do match, we shouldn't send
		// the replica to purgatory.
		if throttledStoreCount > 0 {
			return nil, errors.Errorf("%d matching stores are currently throttled", throttledStoreCount)
		}
		return nil, &allocatorError{
			required: constraints.Constraints,
		}
	}

	existingNodes := make(nodeIDSet, len(existing))
	for _, repl := range existing {
		existingNodes[repl.NodeID] = struct{}{}
	}

	// Because more redundancy is better than less, if relaxConstraints, the
	// matching here is lenient, and tries to find a target by relaxing an
	// attribute constraint, from last attribute to first.
	for attrs := constraints.Constraints; ; attrs = attrs[:len(attrs)-1] {
		filteredSL := sl.filter(config.Constraints{Constraints: attrs})
		if target := a.selectGood(filteredSL, existingNodes); target != nil {
			return target, nil
		}

		// When there are throttled stores that do match, we shouldn't send
		// the replica to purgatory or even consider relaxing the constraints.
		if throttledStoreCount > 0 {
			return nil, errors.Errorf("%d matching stores are currently throttled", throttledStoreCount)
		}
		if len(attrs) == 0 || !relaxConstraints {
			return nil, &allocatorError{
				required:         constraints.Constraints,
				relaxConstraints: relaxConstraints,
				aliveStoreCount:  aliveStoreCount,
			}
		}
	}
}
Example #30
0
// get performs an HTTPS GET to the specified path for a specific node.
func get(ctx context.Context, t *testing.T, base, rel string) []byte {
	// TODO(bram) #2059: Remove retry logic.
	url := base + rel
	for r := retry.Start(retryOptions); r.Next(); {
		resp, err := cluster.HTTPClient.Get(url)
		if err != nil {
			log.Infof(ctx, "could not GET %s - %s", url, err)
			continue
		}
		defer resp.Body.Close()
		body, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			log.Infof(ctx, "could not read body for %s - %s", url, err)
			continue
		}
		if resp.StatusCode != http.StatusOK {
			log.Infof(ctx, "could not GET %s - statuscode: %d - body: %s", url, resp.StatusCode, body)
			continue
		}
		if log.V(1) {
			log.Infof(ctx, "OK response from %s", url)
		}
		return body
	}
	t.Fatalf("There was an error retrieving %s", url)
	return []byte("")
}