// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess( ctx context.Context, start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica, ) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof(ctx, "wait timer interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: if log.V(6) { log.Infof(ctx, "wait timer fired") } rs.waitTimer.Read = true if repl == nil { return false } if log.V(2) { log.Infof(ctx, "replica scanner processing %s", repl) } for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } return false case repl := <-rs.removed: rs.removeReplica(repl) case <-stopper.ShouldStop(): return true } } }
// runHistoryWithRetry intercepts retry errors. If one is encountered, // alternate histories are generated which all contain the exact // history prefix which encountered the error, but which recombine the // remaining commands with all of the commands from the retrying // history. // // This process continues recursively if there are further retries. func (hv *historyVerifier) runHistoryWithRetry( priorities []int32, isolations []enginepb.IsolationType, cmds []*cmd, db *client.DB, t *testing.T, ) error { if err := hv.runHistory(priorities, isolations, cmds, db, t); err != nil { if log.V(1) { log.Infof(context.Background(), "got an error running history %s: %s", historyString(cmds), err) } retry, ok := err.(*retryError) if !ok { return err } if _, hasRetried := hv.retriedTxns[retry.txnIdx]; hasRetried { if log.V(1) { log.Infof(context.Background(), "retried txn %d twice; skipping history", retry.txnIdx+1) } return nil } hv.retriedTxns[retry.txnIdx] = struct{}{} // Randomly subsample 5% of histories for reduced execution time. enumHis := sampleHistories(enumerateHistoriesAfterRetry(retry, cmds), 0.05) for i, h := range enumHis { if log.V(1) { log.Infof(context.Background(), "after retry, running alternate history %d of %d", i, len(enumHis)) } if err := hv.runHistoryWithRetry(priorities, isolations, h, db, t); err != nil { return err } } } return nil }
// improve returns a candidate StoreDescriptor to rebalance a replica to. The // strategy is to always converge on the mean range count. If that isn't // possible, we don't return any candidate. func (rcb rangeCountBalancer) improve(sl StoreList, excluded nodeIDSet) *roachpb.StoreDescriptor { // Attempt to select a better candidate from the supplied list. sl.stores = selectRandom(rcb.rand, allocatorRandomCount, sl, excluded) candidate := rcb.selectBest(sl) if candidate == nil { if log.V(2) { log.Infof(context.TODO(), "not rebalancing: no valid candidate targets: %s", formatCandidates(nil, sl.stores)) } return nil } // Adding a replica to the candidate must make its range count converge on the // mean range count. rebalanceConvergesOnMean := rebalanceToConvergesOnMean(sl, *candidate) if !rebalanceConvergesOnMean { if log.V(2) { log.Infof(context.TODO(), "not rebalancing: %s wouldn't converge on the mean %.1f", formatCandidates(candidate, sl.stores), sl.candidateCount.mean) } return nil } if log.V(2) { log.Infof(context.TODO(), "rebalancing: mean=%.1f %s", sl.candidateCount.mean, formatCandidates(candidate, sl.stores)) } return candidate }
// flush sends the rows accumulated so far in a StreamMessage. func (m *outbox) flush(last bool, err error) error { if !last && m.numRows == 0 { return nil } msg := m.encoder.FormMessage(last, err) if log.V(3) { log.Infof(m.flowCtx.Context, "flushing outbox") } var sendErr error if m.stream != nil { sendErr = m.stream.Send(msg) } else { sendErr = m.syncFlowStream.Send(msg) } if sendErr != nil { if log.V(1) { log.Errorf(m.flowCtx.Context, "outbox flush error: %s", sendErr) } } else if log.V(3) { log.Infof(m.flowCtx.Context, "outbox flushed") } if sendErr != nil { return sendErr } m.numRows = 0 return nil }
// AddMetricStruct examines all fields of metricStruct and adds // all Iterable or metricGroup objects to the registry. func (r *Registry) AddMetricStruct(metricStruct interface{}) { v := reflect.ValueOf(metricStruct) if v.Kind() == reflect.Ptr { v = v.Elem() } t := v.Type() for i := 0; i < v.NumField(); i++ { vfield, tfield := v.Field(i), t.Field(i) if !vfield.CanInterface() { if log.V(2) { log.Infof(context.TODO(), "Skipping unexported field %s", tfield.Name) } continue } val := vfield.Interface() switch typ := val.(type) { case Iterable: r.AddMetric(typ) case Struct: r.AddMetricStruct(typ) default: if log.V(2) { log.Infof(context.TODO(), "Skipping non-metric field %s", tfield.Name) } } } }
// Run is part of the processor interface. func (m *mergeJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(m.ctx, "merge joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting merge joiner run") defer log.Infof(ctx, "exiting merge joiner run") } for { batch, err := m.streamMerger.NextBatch() if err != nil || len(batch) == 0 { m.output.Close(err) return } for _, rowPair := range batch { row, _, err := m.render(rowPair[0], rowPair[1]) if err != nil { m.output.Close(err) return } if row != nil && !m.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } m.output.Close(nil) return } } } }
// wrap the supplied planNode with the sortNode if sorting is required. // The first returned value is "true" if the sort node can be squashed // in the selectTopNode (sorting unneeded). func (n *sortNode) wrap(plan planNode) (bool, planNode) { if n != nil { // Check to see if the requested ordering is compatible with the existing // ordering. existingOrdering := plan.Ordering() if log.V(2) { log.Infof(n.ctx, "Sort: existing=%+v desired=%+v", existingOrdering, n.ordering) } match := computeOrderingMatch(n.ordering, existingOrdering, false) if match < len(n.ordering) { n.plan = plan n.needSort = true return false, n } if len(n.columns) < len(plan.Columns()) { // No sorting required, but we have to strip off the extra render // expressions we added. n.plan = plan return false, n } if log.V(2) { log.Infof(n.ctx, "Sort: no sorting required") } } return true, plan }
// throttle informs the store pool that the given remote store declined a // snapshot or failed to apply one, ensuring that it will not be considered // for up-replication or rebalancing until after the configured timeout period // has elapsed. Declined being true indicates that the remote store explicitly // declined a snapshot. func (sp *StorePool) throttle(reason throttleReason, toStoreID roachpb.StoreID) { sp.mu.Lock() defer sp.mu.Unlock() detail := sp.getStoreDetailLocked(toStoreID) ctx := sp.AnnotateCtx(context.TODO()) // If a snapshot is declined, be it due to an error or because it was // rejected, we mark the store detail as having been declined so it won't // be considered as a candidate for new replicas until after the configured // timeout period has passed. switch reason { case throttleDeclined: detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.declinedReservationsTimeout) if log.V(2) { log.Infof(ctx, "snapshot declined, store:%s will be throttled for %s until %s", toStoreID, sp.declinedReservationsTimeout, detail.throttledUntil) } case throttleFailed: detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.failedReservationsTimeout) if log.V(2) { log.Infof(ctx, "snapshot failed, store:%s will be throttled for %s until %s", toStoreID, sp.failedReservationsTimeout, detail.throttledUntil) } } }
// deleteRow adds to the batch the kv operations necessary to delete a table row // with the given values. func (rd *rowDeleter) deleteRow(ctx context.Context, b *client.Batch, values []parser.Datum) error { if err := rd.fks.checkAll(values); err != nil { return err } primaryIndexKey, secondaryIndexEntries, err := rd.helper.encodeIndexes(rd.fetchColIDtoRowIndex, values) if err != nil { return err } for _, secondaryIndexEntry := range secondaryIndexEntries { if log.V(2) { log.Infof(ctx, "Del %s", secondaryIndexEntry.Key) } b.Del(secondaryIndexEntry.Key) } // Delete the row. rd.startKey = roachpb.Key(primaryIndexKey) rd.endKey = roachpb.Key(encoding.EncodeNotNullDescending(primaryIndexKey)) if log.V(2) { log.Infof(ctx, "DelRange %s - %s", rd.startKey, rd.endKey) } b.DelRange(&rd.startKey, &rd.endKey, false) rd.startKey, rd.endKey = nil, nil return nil }
// Run is part of the processor interface. func (d *distinct) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(d.ctx, "distinct") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting distinct process") defer log.Infof(ctx, "exiting distinct") } var scratch []byte for { row, err := d.input.NextRow() if err != nil || row == nil { d.output.Close(err) return } // If we are processing DISTINCT(x, y) and the input stream is ordered // by x, we define x to be our group key. Our seen set at any given time // is only the set of all rows with the same group key. The encoding of // the row is the key we use in our 'seen' set. encoding, err := d.encode(scratch, row) if err != nil { d.output.Close(err) return } // The 'seen' set is reset whenever we find consecutive rows differing on the // group key thus avoiding the need to store encodings of all rows. matched, err := d.matchLastGroupKey(row) if err != nil { d.output.Close(err) return } if !matched { d.lastGroupKey = row d.seen = make(map[string]struct{}) } key := string(encoding) if _, ok := d.seen[key]; !ok { d.seen[key] = struct{}{} if !d.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } d.output.Close(nil) return } } scratch = encoding[:0] } }
// Run is part of the processor interface. func (ev *evaluator) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(ev.ctx, "evaluator") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting evaluator process") defer log.Infof(ctx, "exiting evaluator") } first := true for { row, err := ev.input.NextRow() if err != nil || row == nil { ev.output.Close(err) return } if first { first = false types := make([]sqlbase.ColumnType_Kind, len(row)) for i := range types { types[i] = row[i].Type } for i, expr := range ev.specExprs { err := ev.exprs[i].init(expr, types, ev.flowCtx.evalCtx) if err != nil { ev.output.Close(err) return } ev.exprTypes[i] = sqlbase.DatumTypeToColumnKind(ev.exprs[i].expr.ResolvedType()) } } outRow, err := ev.eval(row) if err != nil { ev.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing %s\n", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !ev.output.PushRow(outRow) { if log.V(2) { log.Infof(ctx, "no more rows required") } ev.output.Close(nil) return } } }
func (r *RocksDB) open() error { var ver storageVersion if len(r.dir) != 0 { log.Infof(context.TODO(), "opening rocksdb instance at %q", r.dir) // Check the version number. var err error if ver, err = getVersion(r.dir); err != nil { return err } if ver < versionMinimum || ver > versionCurrent { // Instead of an error, we should call a migration if possible when // one is needed immediately following the DBOpen call. return fmt.Errorf("incompatible rocksdb data version, current:%d, on disk:%d, minimum:%d", versionCurrent, ver, versionMinimum) } } else { if log.V(2) { log.Infof(context.TODO(), "opening in memory rocksdb instance") } // In memory dbs are always current. ver = versionCurrent } blockSize := envutil.EnvOrDefaultBytes("COCKROACH_ROCKSDB_BLOCK_SIZE", defaultBlockSize) walTTL := envutil.EnvOrDefaultDuration("COCKROACH_ROCKSDB_WAL_TTL", 0).Seconds() status := C.DBOpen(&r.rdb, goToCSlice([]byte(r.dir)), C.DBOptions{ cache: r.cache.cache, block_size: C.uint64_t(blockSize), wal_ttl_seconds: C.uint64_t(walTTL), allow_os_buffer: C.bool(true), logging_enabled: C.bool(log.V(3)), num_cpu: C.int(runtime.NumCPU()), max_open_files: C.int(r.maxOpenFiles), }) if err := statusToError(status); err != nil { return errors.Errorf("could not open rocksdb instance: %s", err) } // Update or add the version file if needed. if ver < versionCurrent { if err := writeVersionFile(r.dir); err != nil { return err } } // Start a goroutine that will finish when the underlying handle // is deallocated. This is used to check a leak in tests. go func() { <-r.deallocated }() return nil }
func (t *parallelTest) setup(spec *parTestSpec) { if spec.ClusterSize == 0 { spec.ClusterSize = 1 } if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Cluster Size: %d", spec.ClusterSize) } args := base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ Knobs: base.TestingKnobs{ SQLExecutor: &sql.ExecutorTestingKnobs{ WaitForGossipUpdate: true, CheckStmtStringChange: true, }, }, }, } t.cluster = serverutils.StartTestCluster(t, spec.ClusterSize, args) t.clients = make([][]*gosql.DB, spec.ClusterSize) for i := range t.clients { t.clients[i] = append(t.clients[i], t.cluster.ServerConn(i)) } r0 := sqlutils.MakeSQLRunner(t, t.clients[0][0]) if spec.RangeSplitSize != 0 { if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Setting range split size: %d", spec.RangeSplitSize) } zoneCfg := config.DefaultZoneConfig() zoneCfg.RangeMaxBytes = int64(spec.RangeSplitSize) zoneCfg.RangeMinBytes = zoneCfg.RangeMaxBytes / 2 buf, err := protoutil.Marshal(&zoneCfg) if err != nil { t.Fatal(err) } objID := keys.RootNamespaceID r0.Exec(`UPDATE system.zones SET config = $2 WHERE id = $1`, objID, buf) } if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Creating database") } r0.Exec("CREATE DATABASE test") for i := range t.clients { sqlutils.MakeSQLRunner(t, t.clients[i][0]).Exec("SET DATABASE = test") } if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Test setup done") } }
// If the time is greater than the timestamp stored at `key`, run `f`. // Before running `f`, the timestamp is updated forward by a small amount via // a compare-and-swap to ensure at-most-one concurrent execution. After `f` // executes the timestamp is set to the next execution time. // Returns how long until `f` should be run next (i.e. when this method should // be called again). func (s *Server) maybeRunPeriodicCheck( op string, key roachpb.Key, f func(context.Context), ) time.Duration { ctx, span := s.AnnotateCtxWithSpan(context.Background(), "op") defer span.Finish() // Add the op name to the log context. ctx = log.WithLogTag(ctx, op, nil) resp, err := s.db.Get(ctx, key) if err != nil { log.Infof(ctx, "error reading time: %s", err) return updateCheckRetryFrequency } // We should early returned below if either the next check time is in the // future or if the atomic compare-and-set of that time failed (which // would happen if two nodes tried at the same time). if resp.Exists() { whenToCheck, pErr := resp.Value.GetTime() if pErr != nil { log.Warningf(ctx, "error decoding time: %s", err) return updateCheckRetryFrequency } else if delay := whenToCheck.Sub(timeutil.Now()); delay > 0 { return delay } nextRetry := whenToCheck.Add(updateCheckRetryFrequency) if err := s.db.CPut(ctx, key, nextRetry, whenToCheck); err != nil { if log.V(2) { log.Infof(ctx, "could not set next version check time (maybe another node checked?): %s", err) } return updateCheckRetryFrequency } } else { log.Infof(ctx, "No previous %s time.", op) nextRetry := timeutil.Now().Add(updateCheckRetryFrequency) // CPut with `nil` prev value to assert that no other node has checked. if err := s.db.CPut(ctx, key, nextRetry, nil); err != nil { if log.V(2) { log.Infof(ctx, "Could not set %s time (maybe another node checked?): %v", op, err) } return updateCheckRetryFrequency } } f(ctx) if err := s.db.Put(ctx, key, timeutil.Now().Add(updateCheckFrequency)); err != nil { log.Infof(ctx, "Error updating %s time: %v", op, err) } return updateCheckFrequency }
// clearOverlappingCachedRangeDescriptors looks up and clears any // cache entries which overlap the specified descriptor. func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors( desc *roachpb.RangeDescriptor, ) error { key := desc.EndKey metaKey, err := meta(key) if err != nil { return err } // Clear out any descriptors which subsume the key which we're going // to cache. For example, if an existing KeyMin->KeyMax descriptor // should be cleared out in favor of a KeyMin->"m" descriptor. k, v, ok := rdc.rangeCache.cache.Ceil(rangeCacheKey(metaKey)) if ok { descriptor := v.(*roachpb.RangeDescriptor) if descriptor.StartKey.Less(key) && !descriptor.EndKey.Less(key) { if log.V(2) { log.Infof(rdc.ctx, "clearing overlapping descriptor: key=%s desc=%s", k, descriptor) } rdc.rangeCache.cache.Del(k.(rangeCacheKey)) } } startMeta, err := meta(desc.StartKey) if err != nil { return err } endMeta, err := meta(desc.EndKey) if err != nil { return err } // Also clear any descriptors which are subsumed by the one we're // going to cache. This could happen on a merge (and also happens // when there's a lot of concurrency). Iterate from the range meta key // after RangeMetaKey(desc.StartKey) to the range meta key for desc.EndKey. var keys []rangeCacheKey rdc.rangeCache.cache.DoRange(func(k, v interface{}) bool { if log.V(2) { log.Infof(rdc.ctx, "clearing subsumed descriptor: key=%s desc=%s", k, v.(*roachpb.RangeDescriptor)) } keys = append(keys, k.(rangeCacheKey)) return false }, rangeCacheKey(startMeta.Next()), rangeCacheKey(endMeta)) for _, key := range keys { rdc.rangeCache.cache.Del(key) } return nil }
// manage manages outgoing clients. Periodically, the infostore is // scanned for infos with hop count exceeding the MaxHops // threshold. If the number of outgoing clients doesn't exceed // maxPeers(), a new gossip client is connected to a randomly selected // peer beyond MaxHops threshold. Otherwise, the least useful peer // node is cut off to make room for a replacement. Disconnected // clients are processed via the disconnected channel and taken out of // the outgoing address set. If there are no longer any outgoing // connections or the sentinel gossip is unavailable, the bootstrapper // is notified via the stalled conditional variable. func (g *Gossip) manage() { g.server.stopper.RunWorker(func() { ctx := g.AnnotateCtx(context.Background()) cullTicker := time.NewTicker(g.jitteredInterval(g.cullInterval)) stallTicker := time.NewTicker(g.jitteredInterval(g.stallInterval)) defer cullTicker.Stop() defer stallTicker.Stop() for { select { case <-g.server.stopper.ShouldStop(): return case c := <-g.disconnected: g.doDisconnected(c) case nodeID := <-g.tighten: g.tightenNetwork(nodeID) case <-cullTicker.C: func() { g.mu.Lock() if !g.outgoing.hasSpace() { leastUsefulID := g.mu.is.leastUseful(g.outgoing) if c := g.findClient(func(c *client) bool { return c.peerID == leastUsefulID }); c != nil { if log.V(1) { log.Infof(ctx, "closing least useful client %+v to tighten network graph", c) } log.Eventf(ctx, "culling %s", c.addr) c.close() // After releasing the lock, block until the client disconnects. defer func() { g.doDisconnected(<-g.disconnected) }() } else { if log.V(1) { g.clientsMu.Lock() log.Infof(ctx, "couldn't find least useful client among %+v", g.clientsMu.clients) g.clientsMu.Unlock() } } } g.mu.Unlock() }() case <-stallTicker.C: g.mu.Lock() g.maybeSignalStatusChangeLocked() g.mu.Unlock() } } }) }
func (ctx *Context) removeConnLocked(key string, meta *connMeta) { if log.V(1) { log.Infof(ctx.masterCtx, "closing %s", key) } if conn := meta.conn; conn != nil { if err := conn.Close(); err != nil && !grpcutil.IsClosedConnection(err) { if log.V(1) { log.Errorf(ctx.masterCtx, "failed to close client connection: %s", err) } } } delete(ctx.conns.cache, key) }
// processValueSingle processes the given value (of column // family.DefaultColumnID), setting values in the rf.row accordingly. The key is // only used for logging. func (rf *RowFetcher) processValueSingle( family *ColumnFamilyDescriptor, kv client.KeyValue, debugStrings bool, prettyKeyPrefix string, ) (prettyKey string, prettyValue string, err error) { prettyKey = prettyKeyPrefix colID := family.DefaultColumnID if colID == 0 { // If this is the sentinel family, a value is not expected, so we're done. // Otherwise, this means something went wrong in the TableDescriptor // bookkeeping. if family.ID == keys.SentinelFamilyID { return "", "", nil } return "", "", errors.Errorf("single entry value with no default column id") } idx, ok := rf.colIdxMap[colID] if ok && (debugStrings || rf.valNeededForCol[idx]) { if debugStrings { prettyKey = fmt.Sprintf("%s/%s", prettyKey, rf.desc.Columns[idx].Name) } typ := rf.cols[idx].Type // TODO(arjun): The value is a directly marshaled single value, so we // unmarshal it eagerly here. This can potentially be optimized out, // although that would require changing UnmarshalColumnValue to operate // on bytes, and for Encode/DecodeTableValue to operate on marshaled // single values. value, err := UnmarshalColumnValue(&rf.alloc, typ, kv.Value) if err != nil { return "", "", err } if debugStrings { prettyValue = value.String() } if !rf.row[idx].IsUnset() { panic(fmt.Sprintf("duplicate value for column %d", idx)) } rf.row[idx] = DatumToEncDatum(typ, value) if log.V(3) { log.Infof(context.TODO(), "Scan %s -> %v", kv.Key, value) } } else { // No need to unmarshal the column value. Either the column was part of // the index key or it isn't needed. if log.V(3) { log.Infof(context.TODO(), "Scan %s -> [%d] (skipped)", kv.Key, colID) } } return prettyKey, prettyValue, nil }
func (m *outbox) mainLoop() error { if m.syncFlowStream == nil { conn, err := m.flowCtx.rpcCtx.GRPCDial(m.addr) if err != nil { return err } client := NewDistSQLClient(conn) if log.V(2) { log.Infof(m.flowCtx.Context, "outbox: calling FlowStream") } m.stream, err = client.FlowStream(context.TODO()) if err != nil { if log.V(1) { log.Infof(m.flowCtx.Context, "FlowStream error: %s", err) } return err } if log.V(2) { log.Infof(m.flowCtx.Context, "outbox: FlowStream returned") } } flushTicker := time.NewTicker(outboxFlushPeriod) defer flushTicker.Stop() for { select { case d, ok := <-m.RowChannel.C: if !ok { // No more data. return m.flush(true, nil) } err := d.Err if err == nil { err = m.addRow(d.Row) } if err != nil { // Try to flush to send out the error, but ignore any // send error. _ = m.flush(true, err) return err } case <-flushTicker.C: err := m.flush(false, nil) if err != nil { return err } } } }
func (rq *replicateQueue) shouldQueue( ctx context.Context, now hlc.Timestamp, repl *Replica, sysCfg config.SystemConfig, ) (shouldQ bool, priority float64) { if !repl.store.splitQueue.Disabled() && repl.needsSplitBySize() { // If the range exceeds the split threshold, let that finish first. // Ranges must fit in memory on both sender and receiver nodes while // being replicated. This supplements the check provided by // acceptsUnsplitRanges, which looks at zone config boundaries rather // than data size. // // This check is ignored if the split queue is disabled, since in that // case, the split will never come. return } // Find the zone config for this range. desc := repl.Desc() zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { log.Error(ctx, err) return } action, priority := rq.allocator.ComputeAction(zone, desc) if action != AllocatorNoop { if log.V(2) { log.Infof(ctx, "%s repair needed (%s), enqueuing", repl, action) } return true, priority } // See if there is a rebalancing opportunity present. leaseStoreID := repl.store.StoreID() if lease, _ := repl.getLease(); lease != nil { leaseStoreID = lease.Replica.StoreID } target := rq.allocator.RebalanceTarget( zone.Constraints, desc.Replicas, leaseStoreID, desc.RangeID, ) if log.V(2) { if target != nil { log.Infof(ctx, "%s rebalance target found, enqueuing", repl) } else { log.Infof(ctx, "%s no rebalance target found, not enqueuing", repl) } } return target != nil, 0 }
// processValueSingle processes the given value (of column // family.DefaultColumnID), setting values in the rf.row accordingly. The key is // only used for logging. func (rf *RowFetcher) processValueSingle( family *ColumnFamilyDescriptor, kv client.KeyValue, debugStrings bool, prettyKeyPrefix string, ) (prettyKey string, prettyValue string, err error) { prettyKey = prettyKeyPrefix colID := family.DefaultColumnID if colID == 0 { // If this is the sentinel family, a value is not expected, so we're done. // Otherwise, this means something went wrong in the TableDescriptor // bookkeeping. if family.ID == keys.SentinelFamilyID { return "", "", nil } return "", "", errors.Errorf("single entry value with no default column id") } idx, ok := rf.colIdxMap[colID] if ok && (debugStrings || rf.valNeededForCol[idx]) { if debugStrings { prettyKey = fmt.Sprintf("%s/%s", prettyKey, rf.desc.Columns[idx].Name) } kind := rf.cols[idx].Type.Kind // TODO(dan): Once we decide if we're changing the tuple encoding, see if we // can get rid of UnmarshalColumnValue in favor of DecodeTableValue. value, err := UnmarshalColumnValue(&rf.alloc, kind, kv.Value) if err != nil { return "", "", err } if debugStrings { prettyValue = value.String() } if rf.row[idx] != nil { panic(fmt.Sprintf("duplicate value for column %d", idx)) } rf.row[idx] = value if log.V(3) { log.Infof(context.TODO(), "Scan %s -> %v", kv.Key, value) } } else { // No need to unmarshal the column value. Either the column was part of // the index key or it isn't needed. if log.V(3) { log.Infof(context.TODO(), "Scan %s -> [%d] (skipped)", kv.Key, colID) } } return prettyKey, prettyValue, nil }
func (t *parallelTest) run(dir string) { // Process the spec file. mainFile := filepath.Join(dir, "test.yaml") yamlData, err := ioutil.ReadFile(mainFile) if err != nil { t.Fatalf("%s: %s", mainFile, err) } var spec parTestSpec err = yaml.Unmarshal(yamlData, &spec) if err != nil { t.Fatalf("%s: %s", mainFile, err) } if spec.SkipReason != "" { t.Skip(spec.SkipReason) } log.Infof(t.ctx, "Running test %s", dir) if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "spec: %+v", spec) } t.setup(&spec) defer t.close() for runListIdx, runList := range spec.Run { if testing.Verbose() || log.V(1) { var descr []string for _, re := range runList { descr = append(descr, fmt.Sprintf("%d:%s", re.Node, re.File)) } log.Infof(t.ctx, "%s: run list %d: %s", mainFile, runListIdx, strings.Join(descr, ", ")) } // Store the number of clients used so far (per node). numClients := make([]int, spec.ClusterSize) ch := make(chan bool) for _, re := range runList { client := t.getClient(re.Node, numClients[re.Node]) numClients[re.Node]++ go t.processTestFile(filepath.Join(dir, re.File), re.Node, client, ch) } // Wait for all clients to complete. for range runList { <-ch } } }
// getTableLeaseByID is a by-ID variant of getTableLease (i.e. uses same cache). func (p *planner) getTableLeaseByID(tableID sqlbase.ID) (*sqlbase.TableDescriptor, error) { if log.V(2) { log.Infof(p.ctx(), "planner acquiring lease on table ID %d", tableID) } if testDisableTableLeases { table, err := sqlbase.GetTableDescFromID(p.txn, tableID) if err != nil { return nil, err } if err := filterTableState(table); err != nil { return nil, err } return table, nil } // First, look to see if we already have a lease for this table -- including // leases acquired via `getTableLease`. var lease *LeaseState for _, l := range p.leases { if l.ID == tableID { lease = l if log.V(2) { log.Infof(p.ctx(), "found lease in planner cache for table %d", tableID) } break } } // If we didn't find a lease or the lease is about to expire, acquire one. if lease == nil || p.removeLeaseIfExpiring(lease) { var err error lease, err = p.leaseMgr.Acquire(p.txn, tableID, 0) if err != nil { if err == sqlbase.ErrDescriptorNotFound { // Transform the descriptor error into an error that references the // table's ID. return nil, sqlbase.NewUndefinedTableError(fmt.Sprintf("<id=%d>", tableID)) } return nil, err } p.leases = append(p.leases, lease) // If the lease we just acquired expires before the txn's deadline, reduce // the deadline. p.txn.UpdateDeadlineMaybe(hlc.Timestamp{WallTime: lease.Expiration().UnixNano()}) } return &lease.TableDescriptor, nil }
// GetTotalMemory returns either the total system memory or if possible the // cgroups available memory. func GetTotalMemory() (int64, error) { mem := gosigar.Mem{} if err := mem.Get(); err != nil { return 0, err } if mem.Total > math.MaxInt64 { return 0, fmt.Errorf("inferred memory size %s exceeds maximum supported memory size %s", humanize.IBytes(mem.Total), humanize.Bytes(math.MaxInt64)) } totalMem := int64(mem.Total) if runtime.GOOS == "linux" { var err error var buf []byte if buf, err = ioutil.ReadFile(defaultCGroupMemPath); err != nil { if log.V(1) { log.Infof(context.TODO(), "can't read available memory from cgroups (%s), using system memory %s instead", err, humanizeutil.IBytes(totalMem)) } return totalMem, nil } var cgAvlMem uint64 if cgAvlMem, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64); err != nil { if log.V(1) { log.Infof(context.TODO(), "can't parse available memory from cgroups (%s), using system memory %s instead", err, humanizeutil.IBytes(totalMem)) } return totalMem, nil } if cgAvlMem > math.MaxInt64 { if log.V(1) { log.Infof(context.TODO(), "available memory from cgroups is too large and unsupported %s using system memory %s instead", humanize.IBytes(cgAvlMem), humanizeutil.IBytes(totalMem)) } return totalMem, nil } if cgAvlMem > mem.Total { if log.V(1) { log.Infof(context.TODO(), "available memory from cgroups %s exceeds system memory %s, using system memory", humanize.IBytes(cgAvlMem), humanizeutil.IBytes(totalMem)) } return totalMem, nil } return int64(cgAvlMem), nil } return totalMem, nil }
func TestDockerCLI(t *testing.T) { containerConfig := container.Config{ Image: postgresTestImage, Cmd: []string{"stat", cluster.CockroachBinaryInContainer}, } if err := testDockerOneShot(t, "cli_test", containerConfig); err != nil { t.Skipf(`TODO(dt): No binary in one-shot container, see #6086: %s`, err) } paths, err := filepath.Glob(testGlob) if err != nil { t.Fatal(err) } if len(paths) == 0 { t.Fatalf("no testfiles found (%v)", testGlob) } verbose := testing.Verbose() || log.V(1) for _, p := range paths { testFile := filepath.Base(p) testPath := filepath.Join(containerPath, testFile) t.Run(testFile, func(t *testing.T) { cmd := cmdBase if verbose { cmd = append(cmd, "-d") } cmd = append(cmd, "-f", testPath, cluster.CockroachBinaryInContainer) containerConfig.Cmd = cmd if err := testDockerOneShot(t, "cli_test", containerConfig); err != nil { t.Error(err) } }) } }
// getDatabaseID implements the DatabaseAccessor interface. func (p *planner) getDatabaseID(name string) (sqlbase.ID, error) { if virtual := p.session.virtualSchemas.getVirtualDatabaseDesc(name); virtual != nil { return virtual.GetID(), nil } if id := p.databaseCache.getID(name); id != 0 { return id, nil } // Lookup the database in the cache first, falling back to the KV store if it // isn't present. The cache might cause the usage of a recently renamed // database, but that's a race that could occur anyways. desc, err := p.getCachedDatabaseDesc(name) if err != nil { if log.V(3) { log.Infof(p.ctx(), "%v", err) } var err error desc, err = p.mustGetDatabaseDesc(name) if err != nil { return 0, err } } p.databaseCache.setID(name, desc.ID) return desc.ID, nil }
func (p *planner) createDescriptorWithID( idKey roachpb.Key, id sqlbase.ID, descriptor sqlbase.DescriptorProto, ) error { descriptor.SetID(id) // TODO(pmattis): The error currently returned below is likely going to be // difficult to interpret. // // TODO(pmattis): Need to handle if-not-exists here as well. // // TODO(pmattis): This is writing the namespace and descriptor table entries, // but not going through the normal INSERT logic and not performing a precise // mimicry. In particular, we're only writing a single key per table, while // perfect mimicry would involve writing a sentinel key for each row as well. descKey := sqlbase.MakeDescMetadataKey(descriptor.GetID()) b := &client.Batch{} descID := descriptor.GetID() descDesc := sqlbase.WrapDescriptor(descriptor) if log.V(2) { log.Infof(p.ctx(), "CPut %s -> %d", idKey, descID) log.Infof(p.ctx(), "CPut %s -> %s", descKey, descDesc) } b.CPut(idKey, descID, nil) b.CPut(descKey, descDesc, nil) p.setTestingVerifyMetadata(func(systemConfig config.SystemConfig) error { if err := expectDescriptorID(systemConfig, idKey, descID); err != nil { return err } return expectDescriptor(systemConfig, descKey, descDesc) }) return p.txn.Run(b) }
func (s *Server) reportUsage(ctx context.Context) { b := new(bytes.Buffer) if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil { log.Warning(ctx, err) return } q := reportingURL.Query() q.Set("version", build.GetInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) reportingURL.RawQuery = q.Encode() res, err := http.Post(reportingURL.String(), "application/json", b) if err != nil && log.V(2) { // This is probably going to be relatively common in production // environments where network access is usually curtailed. log.Warning(ctx, "Failed to report node usage metrics: ", err) return } if res.StatusCode != http.StatusOK { b, err := ioutil.ReadAll(res.Body) log.Warningf(ctx, "Failed to report node usage metrics: status: %s, body: %s, "+ "error: %v", res.Status, b, err) } }
// AllocateTarget returns a suitable store for a new allocation with the // required attributes. Nodes already accommodating existing replicas are ruled // out as targets. The range ID of the replica being allocated for is also // passed in to ensure that we don't try to replace an existing dead replica on // a store. If relaxConstraints is true, then the required attributes will be // relaxed as necessary, from least specific to most specific, in order to // allocate a target. func (a *Allocator) AllocateTarget( constraints config.Constraints, existing []roachpb.ReplicaDescriptor, rangeID roachpb.RangeID, relaxConstraints bool, ) (*roachpb.StoreDescriptor, error) { sl, aliveStoreCount, throttledStoreCount := a.storePool.getStoreList(rangeID) if a.options.UseRuleSolver { candidates := allocateCandidates( sl, constraints, existing, a.storePool.getNodeLocalities(existing), a.storePool.deterministic, ) if log.V(3) { log.Infof(context.TODO(), "allocate candidates: %s", candidates) } if target := candidates.selectGood(a.randGen); target != nil { return target, nil } // When there are throttled stores that do match, we shouldn't send // the replica to purgatory. if throttledStoreCount > 0 { return nil, errors.Errorf("%d matching stores are currently throttled", throttledStoreCount) } return nil, &allocatorError{ required: constraints.Constraints, } } existingNodes := make(nodeIDSet, len(existing)) for _, repl := range existing { existingNodes[repl.NodeID] = struct{}{} } // Because more redundancy is better than less, if relaxConstraints, the // matching here is lenient, and tries to find a target by relaxing an // attribute constraint, from last attribute to first. for attrs := constraints.Constraints; ; attrs = attrs[:len(attrs)-1] { filteredSL := sl.filter(config.Constraints{Constraints: attrs}) if target := a.selectGood(filteredSL, existingNodes); target != nil { return target, nil } // When there are throttled stores that do match, we shouldn't send // the replica to purgatory or even consider relaxing the constraints. if throttledStoreCount > 0 { return nil, errors.Errorf("%d matching stores are currently throttled", throttledStoreCount) } if len(attrs) == 0 || !relaxConstraints { return nil, &allocatorError{ required: constraints.Constraints, relaxConstraints: relaxConstraints, aliveStoreCount: aliveStoreCount, } } } }
// get performs an HTTPS GET to the specified path for a specific node. func get(ctx context.Context, t *testing.T, base, rel string) []byte { // TODO(bram) #2059: Remove retry logic. url := base + rel for r := retry.Start(retryOptions); r.Next(); { resp, err := cluster.HTTPClient.Get(url) if err != nil { log.Infof(ctx, "could not GET %s - %s", url, err) continue } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { log.Infof(ctx, "could not read body for %s - %s", url, err) continue } if resp.StatusCode != http.StatusOK { log.Infof(ctx, "could not GET %s - statuscode: %d - body: %s", url, resp.StatusCode, body) continue } if log.V(1) { log.Infof(ctx, "OK response from %s", url) } return body } t.Fatalf("There was an error retrieving %s", url) return []byte("") }