func postFreeze( c cluster.Cluster, freeze bool, timeout time.Duration, ) (serverpb.ClusterFreezeResponse, error) { httpClient := cluster.HTTPClient httpClient.Timeout = timeout var resp serverpb.ClusterFreezeResponse log.Infof(context.Background(), "requesting: freeze=%t, timeout=%s", freeze, timeout) cb := func(v proto.Message) { oldNum := resp.RangesAffected resp = *v.(*serverpb.ClusterFreezeResponse) if oldNum > resp.RangesAffected { resp.RangesAffected = oldNum } if (resp != serverpb.ClusterFreezeResponse{}) { log.Infof(context.Background(), "%+v", &resp) } } err := httputil.StreamJSON( httpClient, c.URL(0)+"/_admin/v1/cluster/freeze", &serverpb.ClusterFreezeRequest{Freeze: freeze}, &serverpb.ClusterFreezeResponse{}, cb, ) return resp, err }
func testGossipPeeringsInner( ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig, ) { num := c.NumNodes() deadline := timeutil.Now().Add(cfg.Duration) waitTime := longWaitTime if cfg.Duration < waitTime { waitTime = shortWaitTime } for timeutil.Now().Before(deadline) { CheckGossip(ctx, t, c, waitTime, HasPeers(num)) // Restart the first node. log.Infof(ctx, "restarting node 0") if err := c.Restart(ctx, 0); err != nil { t.Fatal(err) } CheckGossip(ctx, t, c, waitTime, HasPeers(num)) // Restart another node (if there is one). var pickedNode int if num > 1 { pickedNode = rand.Intn(num-1) + 1 } log.Infof(ctx, "restarting node %d", pickedNode) if err := c.Restart(ctx, pickedNode); err != nil { t.Fatal(err) } CheckGossip(ctx, t, c, waitTime, HasPeers(num)) } }
// deleteRow adds to the batch the kv operations necessary to delete a table row // with the given values. func (rd *rowDeleter) deleteRow(ctx context.Context, b *client.Batch, values []parser.Datum) error { if err := rd.fks.checkAll(values); err != nil { return err } primaryIndexKey, secondaryIndexEntries, err := rd.helper.encodeIndexes(rd.fetchColIDtoRowIndex, values) if err != nil { return err } for _, secondaryIndexEntry := range secondaryIndexEntries { if log.V(2) { log.Infof(ctx, "Del %s", secondaryIndexEntry.Key) } b.Del(secondaryIndexEntry.Key) } // Delete the row. rd.startKey = roachpb.Key(primaryIndexKey) rd.endKey = roachpb.Key(encoding.EncodeNotNullDescending(primaryIndexKey)) if log.V(2) { log.Infof(ctx, "DelRange %s - %s", rd.startKey, rd.endKey) } b.DelRange(&rd.startKey, &rd.endKey, false) rd.startKey, rd.endKey = nil, nil return nil }
func main() { flag.Parse() c := localcluster.New(*numNodes) defer c.Close() log.SetExitFunc(func(code int) { c.Close() os.Exit(code) }) signalCh := make(chan os.Signal, 1) signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) a := newAllocSim(c) go func() { var exitStatus int select { case s := <-signalCh: log.Infof(context.Background(), "signal received: %v", s) exitStatus = 1 case <-time.After(*duration): log.Infof(context.Background(), "finished run of: %s", *duration) } a.finalStatus() c.Close() os.Exit(exitStatus) }() c.Start("allocsim", *workers, flag.Args(), []string{}) c.UpdateZoneConfig(1, 1<<20) a.run(*workers) }
// improve returns a candidate StoreDescriptor to rebalance a replica to. The // strategy is to always converge on the mean range count. If that isn't // possible, we don't return any candidate. func (rcb rangeCountBalancer) improve(sl StoreList, excluded nodeIDSet) *roachpb.StoreDescriptor { // Attempt to select a better candidate from the supplied list. sl.stores = selectRandom(rcb.rand, allocatorRandomCount, sl, excluded) candidate := rcb.selectBest(sl) if candidate == nil { if log.V(2) { log.Infof(context.TODO(), "not rebalancing: no valid candidate targets: %s", formatCandidates(nil, sl.stores)) } return nil } // Adding a replica to the candidate must make its range count converge on the // mean range count. rebalanceConvergesOnMean := rebalanceToConvergesOnMean(sl, *candidate) if !rebalanceConvergesOnMean { if log.V(2) { log.Infof(context.TODO(), "not rebalancing: %s wouldn't converge on the mean %.1f", formatCandidates(candidate, sl.stores), sl.candidateCount.mean) } return nil } if log.V(2) { log.Infof(context.TODO(), "rebalancing: mean=%.1f %s", sl.candidateCount.mean, formatCandidates(candidate, sl.stores)) } return candidate }
// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess( ctx context.Context, start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica, ) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof(ctx, "wait timer interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: if log.V(6) { log.Infof(ctx, "wait timer fired") } rs.waitTimer.Read = true if repl == nil { return false } if log.V(2) { log.Infof(ctx, "replica scanner processing %s", repl) } for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } return false case repl := <-rs.removed: rs.removeReplica(repl) case <-stopper.ShouldStop(): return true } } }
// process synchronously invokes admin split for each proposed split key. func (sq *splitQueue) process( ctx context.Context, now hlc.Timestamp, r *Replica, sysCfg config.SystemConfig, ) error { // First handle case of splitting due to zone config maps. desc := r.Desc() splitKeys := sysCfg.ComputeSplitKeys(desc.StartKey, desc.EndKey) if len(splitKeys) > 0 { log.Infof(ctx, "splitting at keys %v", splitKeys) for _, splitKey := range splitKeys { if err := sq.db.AdminSplit(ctx, splitKey.AsRawKey()); err != nil { return errors.Errorf("unable to split %s at key %q: %s", r, splitKey, err) } } return nil } // Next handle case of splitting due to size. zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return err } size := r.GetMVCCStats().Total() // FIXME: why is this implementation not the same as the one above? if float64(size)/float64(zone.RangeMaxBytes) > 1 { log.Infof(ctx, "splitting size=%d max=%d", size, zone.RangeMaxBytes) if _, pErr := client.SendWrappedWith(ctx, r, roachpb.Header{ Timestamp: now, }, &roachpb.AdminSplitRequest{ Span: roachpb.Span{Key: desc.StartKey.AsRawKey()}, }); pErr != nil { return pErr.GoError() } } return nil }
func (p *planner) createDescriptorWithID( idKey roachpb.Key, id sqlbase.ID, descriptor sqlbase.DescriptorProto, ) error { descriptor.SetID(id) // TODO(pmattis): The error currently returned below is likely going to be // difficult to interpret. // // TODO(pmattis): Need to handle if-not-exists here as well. // // TODO(pmattis): This is writing the namespace and descriptor table entries, // but not going through the normal INSERT logic and not performing a precise // mimicry. In particular, we're only writing a single key per table, while // perfect mimicry would involve writing a sentinel key for each row as well. descKey := sqlbase.MakeDescMetadataKey(descriptor.GetID()) b := &client.Batch{} descID := descriptor.GetID() descDesc := sqlbase.WrapDescriptor(descriptor) if log.V(2) { log.Infof(p.ctx(), "CPut %s -> %d", idKey, descID) log.Infof(p.ctx(), "CPut %s -> %s", descKey, descDesc) } b.CPut(idKey, descID, nil) b.CPut(descKey, descDesc, nil) p.setTestingVerifyMetadata(func(systemConfig config.SystemConfig) error { if err := expectDescriptorID(systemConfig, idKey, descID); err != nil { return err } return expectDescriptor(systemConfig, descKey, descDesc) }) return p.txn.Run(b) }
// get performs an HTTPS GET to the specified path for a specific node. func get(ctx context.Context, t *testing.T, base, rel string) []byte { // TODO(bram) #2059: Remove retry logic. url := base + rel for r := retry.Start(retryOptions); r.Next(); { resp, err := cluster.HTTPClient.Get(url) if err != nil { log.Infof(ctx, "could not GET %s - %s", url, err) continue } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { log.Infof(ctx, "could not read body for %s - %s", url, err) continue } if resp.StatusCode != http.StatusOK { log.Infof(ctx, "could not GET %s - statuscode: %d - body: %s", url, resp.StatusCode, body) continue } if log.V(1) { log.Infof(ctx, "OK response from %s", url) } return body } t.Fatalf("There was an error retrieving %s", url) return []byte("") }
// flush sends the rows accumulated so far in a StreamMessage. func (m *outbox) flush(last bool, err error) error { if !last && m.numRows == 0 { return nil } msg := m.encoder.FormMessage(last, err) if log.V(3) { log.Infof(m.flowCtx.Context, "flushing outbox") } var sendErr error if m.stream != nil { sendErr = m.stream.Send(msg) } else { sendErr = m.syncFlowStream.Send(msg) } if sendErr != nil { if log.V(1) { log.Errorf(m.flowCtx.Context, "outbox flush error: %s", sendErr) } } else if log.V(3) { log.Infof(m.flowCtx.Context, "outbox flushed") } if sendErr != nil { return sendErr } m.numRows = 0 return nil }
func pullImage( ctx context.Context, l *LocalCluster, ref string, options types.ImagePullOptions, ) error { // HACK: on CircleCI, docker pulls the image on the first access from an // acceptance test even though that image is already present. So we first // check to see if our image is present in order to avoid this slowness. if hasImage(ctx, l, ref) { log.Infof(ctx, "ImagePull %s already exists", ref) return nil } log.Infof(ctx, "ImagePull %s starting", ref) defer log.Infof(ctx, "ImagePull %s complete", ref) rc, err := l.client.ImagePull(ctx, ref, options) if err != nil { return err } defer rc.Close() out := os.Stderr outFd := out.Fd() isTerminal := isatty.IsTerminal(outFd) return jsonmessage.DisplayJSONMessagesStream(rc, out, outFd, isTerminal, nil) }
// Run is part of the processor interface. func (h *hashJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(h.ctx, "hash joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting hash joiner run") defer log.Infof(ctx, "exiting hash joiner run") } if err := h.buildPhase(ctx); err != nil { h.output.Close(err) return } if h.joinType == rightOuter || h.joinType == fullOuter { for k, bucket := range h.buckets { bucket.seen = make([]bool, len(bucket.rows)) h.buckets[k] = bucket } } err := h.probePhase(ctx) h.output.Close(err) }
// initNodeID updates the internal NodeDescriptor with the given ID. If zero is // supplied, a new NodeID is allocated with the first invocation. For all other // values, the supplied ID is stored into the descriptor (unless one has been // set previously, in which case a fatal error occurs). // // Upon setting a new NodeID, the descriptor is gossiped and the NodeID is // stored into the gossip instance. func (n *Node) initNodeID(id roachpb.NodeID) { ctx := n.AnnotateCtx(context.TODO()) if id < 0 { log.Fatalf(ctx, "NodeID must not be negative") } if o := n.Descriptor.NodeID; o > 0 { if id == 0 { return } log.Fatalf(ctx, "cannot initialize NodeID to %d, already have %d", id, o) } var err error if id == 0 { ctxWithSpan, span := n.AnnotateCtxWithSpan(ctx, "alloc-node-id") id, err = allocateNodeID(ctxWithSpan, n.storeCfg.DB) if err != nil { log.Fatal(ctxWithSpan, err) } log.Infof(ctxWithSpan, "new node allocated ID %d", id) if id == 0 { log.Fatal(ctxWithSpan, "new node allocated illegal ID 0") } span.Finish() n.storeCfg.Gossip.NodeID.Set(ctx, id) } else { log.Infof(ctx, "node ID %d initialized", id) } // Gossip the node descriptor to make this node addressable by node ID. n.Descriptor.NodeID = id if err = n.storeCfg.Gossip.SetNodeDescriptor(&n.Descriptor); err != nil { log.Fatalf(ctx, "couldn't gossip descriptor for node %d: %s", n.Descriptor.NodeID, err) } }
// runHistoryWithRetry intercepts retry errors. If one is encountered, // alternate histories are generated which all contain the exact // history prefix which encountered the error, but which recombine the // remaining commands with all of the commands from the retrying // history. // // This process continues recursively if there are further retries. func (hv *historyVerifier) runHistoryWithRetry( priorities []int32, isolations []enginepb.IsolationType, cmds []*cmd, db *client.DB, t *testing.T, ) error { if err := hv.runHistory(priorities, isolations, cmds, db, t); err != nil { if log.V(1) { log.Infof(context.Background(), "got an error running history %s: %s", historyString(cmds), err) } retry, ok := err.(*retryError) if !ok { return err } if _, hasRetried := hv.retriedTxns[retry.txnIdx]; hasRetried { if log.V(1) { log.Infof(context.Background(), "retried txn %d twice; skipping history", retry.txnIdx+1) } return nil } hv.retriedTxns[retry.txnIdx] = struct{}{} // Randomly subsample 5% of histories for reduced execution time. enumHis := sampleHistories(enumerateHistoriesAfterRetry(retry, cmds), 0.05) for i, h := range enumHis { if log.V(1) { log.Infof(context.Background(), "after retry, running alternate history %d of %d", i, len(enumHis)) } if err := hv.runHistoryWithRetry(priorities, isolations, h, db, t); err != nil { return err } } } return nil }
// AddMetricStruct examines all fields of metricStruct and adds // all Iterable or metricGroup objects to the registry. func (r *Registry) AddMetricStruct(metricStruct interface{}) { v := reflect.ValueOf(metricStruct) if v.Kind() == reflect.Ptr { v = v.Elem() } t := v.Type() for i := 0; i < v.NumField(); i++ { vfield, tfield := v.Field(i), t.Field(i) if !vfield.CanInterface() { if log.V(2) { log.Infof(context.TODO(), "Skipping unexported field %s", tfield.Name) } continue } val := vfield.Interface() switch typ := val.(type) { case Iterable: r.AddMetric(typ) case Struct: r.AddMetricStruct(typ) default: if log.V(2) { log.Infof(context.TODO(), "Skipping non-metric field %s", tfield.Name) } } } }
// sendGossip sends the latest gossip to the remote server, based on // the remote server's notion of other nodes' high water timestamps. func (c *client) sendGossip(g *Gossip, stream Gossip_GossipClient) error { g.mu.Lock() if delta := g.mu.is.delta(c.remoteHighWaterStamps); len(delta) > 0 { args := Request{ NodeID: g.NodeID.Get(), Addr: g.mu.is.NodeAddr, Delta: delta, HighWaterStamps: g.mu.is.getHighWaterStamps(), } bytesSent := int64(args.Size()) infosSent := int64(len(delta)) c.clientMetrics.BytesSent.Inc(bytesSent) c.clientMetrics.InfosSent.Inc(infosSent) c.nodeMetrics.BytesSent.Inc(bytesSent) c.nodeMetrics.InfosSent.Inc(infosSent) if log.V(1) { ctx := c.AnnotateCtx(stream.Context()) if c.peerID != 0 { log.Infof(ctx, "sending %s to node %d (%s)", extractKeys(args.Delta), c.peerID, c.addr) } else { log.Infof(ctx, "sending %s to %s", extractKeys(args.Delta), c.addr) } } g.mu.Unlock() return stream.Send(&args) } g.mu.Unlock() return nil }
// wrap the supplied planNode with the sortNode if sorting is required. // The first returned value is "true" if the sort node can be squashed // in the selectTopNode (sorting unneeded). func (n *sortNode) wrap(plan planNode) (bool, planNode) { if n != nil { // Check to see if the requested ordering is compatible with the existing // ordering. existingOrdering := plan.Ordering() if log.V(2) { log.Infof(n.ctx, "Sort: existing=%+v desired=%+v", existingOrdering, n.ordering) } match := computeOrderingMatch(n.ordering, existingOrdering, false) if match < len(n.ordering) { n.plan = plan n.needSort = true return false, n } if len(n.columns) < len(plan.Columns()) { // No sorting required, but we have to strip off the extra render // expressions we added. n.plan = plan return false, n } if log.V(2) { log.Infof(n.ctx, "Sort: no sorting required") } } return true, plan }
// throttle informs the store pool that the given remote store declined a // snapshot or failed to apply one, ensuring that it will not be considered // for up-replication or rebalancing until after the configured timeout period // has elapsed. Declined being true indicates that the remote store explicitly // declined a snapshot. func (sp *StorePool) throttle(reason throttleReason, toStoreID roachpb.StoreID) { sp.mu.Lock() defer sp.mu.Unlock() detail := sp.getStoreDetailLocked(toStoreID) ctx := sp.AnnotateCtx(context.TODO()) // If a snapshot is declined, be it due to an error or because it was // rejected, we mark the store detail as having been declined so it won't // be considered as a candidate for new replicas until after the configured // timeout period has passed. switch reason { case throttleDeclined: detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.declinedReservationsTimeout) if log.V(2) { log.Infof(ctx, "snapshot declined, store:%s will be throttled for %s until %s", toStoreID, sp.declinedReservationsTimeout, detail.throttledUntil) } case throttleFailed: detail.throttledUntil = sp.clock.Now().GoTime().Add(sp.failedReservationsTimeout) if log.V(2) { log.Infof(ctx, "snapshot failed, store:%s will be throttled for %s until %s", toStoreID, sp.failedReservationsTimeout, detail.throttledUntil) } } }
// Run is part of the processor interface. func (m *mergeJoiner) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(m.ctx, "merge joiner") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting merge joiner run") defer log.Infof(ctx, "exiting merge joiner run") } for { batch, err := m.streamMerger.NextBatch() if err != nil || len(batch) == 0 { m.output.Close(err) return } for _, rowPair := range batch { row, _, err := m.render(rowPair[0], rowPair[1]) if err != nil { m.output.Close(err) return } if row != nil && !m.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } m.output.Close(nil) return } } } }
func (at *allocatorTest) Run(ctx context.Context, t *testing.T) { at.f = MakeFarmer(t, at.Prefix, stopper) if at.CockroachDiskSizeGB != 0 { at.f.AddVars["cockroach_disk_size"] = strconv.Itoa(at.CockroachDiskSizeGB) } log.Infof(ctx, "creating cluster with %d node(s)", at.StartNodes) if err := at.f.Resize(at.StartNodes); err != nil { t.Fatal(err) } CheckGossip(ctx, t, at.f, longWaitTime, HasPeers(at.StartNodes)) at.f.Assert(ctx, t) log.Info(ctx, "initial cluster is up") // We must stop the cluster because a) `nodectl` pokes at the data directory // and, more importantly, b) we don't want the cluster above and the cluster // below to ever talk to each other (see #7224). log.Info(ctx, "stopping cluster") for i := 0; i < at.f.NumNodes(); i++ { if err := at.f.Kill(ctx, i); err != nil { t.Fatalf("error stopping node %d: %s", i, err) } } log.Info(ctx, "downloading archived stores from Google Cloud Storage in parallel") errors := make(chan error, at.f.NumNodes()) for i := 0; i < at.f.NumNodes(); i++ { go func(nodeNum int) { errors <- at.f.Exec(nodeNum, "./nodectl download "+at.StoreURL) }(i) } for i := 0; i < at.f.NumNodes(); i++ { if err := <-errors; err != nil { t.Fatalf("error downloading store %d: %s", i, err) } } log.Info(ctx, "restarting cluster with archived store(s)") for i := 0; i < at.f.NumNodes(); i++ { if err := at.f.Restart(ctx, i); err != nil { t.Fatalf("error restarting node %d: %s", i, err) } } at.f.Assert(ctx, t) log.Infof(ctx, "resizing cluster to %d nodes", at.EndNodes) if err := at.f.Resize(at.EndNodes); err != nil { t.Fatal(err) } CheckGossip(ctx, t, at.f, longWaitTime, HasPeers(at.EndNodes)) at.f.Assert(ctx, t) log.Info(ctx, "waiting for rebalance to finish") if err := at.WaitForRebalance(ctx, t); err != nil { t.Fatal(err) } at.f.Assert(ctx, t) }
// Start starts a node. func (n *Node) Start() { n.Lock() defer n.Unlock() if n.cmd != nil { return } n.cmd = exec.Command(n.args[0], n.args[1:]...) n.cmd.Env = os.Environ() n.cmd.Env = append(n.cmd.Env, n.env...) stdoutPath := filepath.Join(n.logDir, "stdout") stdout, err := os.OpenFile(stdoutPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf(context.Background(), "unable to open file %s: %s", stdoutPath, err) } n.cmd.Stdout = stdout stderrPath := filepath.Join(n.logDir, "stderr") stderr, err := os.OpenFile(stderrPath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf(context.Background(), "unable to open file %s: %s", stderrPath, err) } n.cmd.Stderr = stderr err = n.cmd.Start() if n.cmd.Process != nil { log.Infof(context.Background(), "process %d started: %s", n.cmd.Process.Pid, strings.Join(n.args, " ")) } if err != nil { log.Infof(context.Background(), "%v", err) _ = stdout.Close() _ = stderr.Close() return } go func(cmd *exec.Cmd) { if err := cmd.Wait(); err != nil { log.Errorf(context.Background(), "waiting for command: %v", err) } _ = stdout.Close() _ = stderr.Close() ps := cmd.ProcessState sy := ps.Sys().(syscall.WaitStatus) log.Infof(context.Background(), "Process %d exited with status %d", ps.Pid(), sy.ExitStatus()) log.Infof(context.Background(), ps.String()) n.Lock() n.cmd = nil n.Unlock() }(n.cmd) }
// Run is part of the processor interface. func (d *distinct) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(d.ctx, "distinct") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting distinct process") defer log.Infof(ctx, "exiting distinct") } var scratch []byte for { row, err := d.input.NextRow() if err != nil || row == nil { d.output.Close(err) return } // If we are processing DISTINCT(x, y) and the input stream is ordered // by x, we define x to be our group key. Our seen set at any given time // is only the set of all rows with the same group key. The encoding of // the row is the key we use in our 'seen' set. encoding, err := d.encode(scratch, row) if err != nil { d.output.Close(err) return } // The 'seen' set is reset whenever we find consecutive rows differing on the // group key thus avoiding the need to store encodings of all rows. matched, err := d.matchLastGroupKey(row) if err != nil { d.output.Close(err) return } if !matched { d.lastGroupKey = row d.seen = make(map[string]struct{}) } key := string(encoding) if _, ok := d.seen[key]; !ok { d.seen[key] = struct{}{} if !d.output.PushRow(row) { if log.V(2) { log.Infof(ctx, "no more rows required") } d.output.Close(nil) return } } scratch = encoding[:0] } }
// Run is part of the processor interface. func (ev *evaluator) Run(wg *sync.WaitGroup) { if wg != nil { defer wg.Done() } ctx, span := tracing.ChildSpan(ev.ctx, "evaluator") defer tracing.FinishSpan(span) if log.V(2) { log.Infof(ctx, "starting evaluator process") defer log.Infof(ctx, "exiting evaluator") } first := true for { row, err := ev.input.NextRow() if err != nil || row == nil { ev.output.Close(err) return } if first { first = false types := make([]sqlbase.ColumnType_Kind, len(row)) for i := range types { types[i] = row[i].Type } for i, expr := range ev.specExprs { err := ev.exprs[i].init(expr, types, ev.flowCtx.evalCtx) if err != nil { ev.output.Close(err) return } ev.exprTypes[i] = sqlbase.DatumTypeToColumnKind(ev.exprs[i].expr.ResolvedType()) } } outRow, err := ev.eval(row) if err != nil { ev.output.Close(err) return } if log.V(3) { log.Infof(ctx, "pushing %s\n", outRow) } // Push the row to the output RowReceiver; stop if they don't need more // rows. if !ev.output.PushRow(outRow) { if log.V(2) { log.Infof(ctx, "no more rows required") } ev.output.Close(nil) return } } }
func (r *RocksDB) open() error { var ver storageVersion if len(r.dir) != 0 { log.Infof(context.TODO(), "opening rocksdb instance at %q", r.dir) // Check the version number. var err error if ver, err = getVersion(r.dir); err != nil { return err } if ver < versionMinimum || ver > versionCurrent { // Instead of an error, we should call a migration if possible when // one is needed immediately following the DBOpen call. return fmt.Errorf("incompatible rocksdb data version, current:%d, on disk:%d, minimum:%d", versionCurrent, ver, versionMinimum) } } else { if log.V(2) { log.Infof(context.TODO(), "opening in memory rocksdb instance") } // In memory dbs are always current. ver = versionCurrent } blockSize := envutil.EnvOrDefaultBytes("COCKROACH_ROCKSDB_BLOCK_SIZE", defaultBlockSize) walTTL := envutil.EnvOrDefaultDuration("COCKROACH_ROCKSDB_WAL_TTL", 0).Seconds() status := C.DBOpen(&r.rdb, goToCSlice([]byte(r.dir)), C.DBOptions{ cache: r.cache.cache, block_size: C.uint64_t(blockSize), wal_ttl_seconds: C.uint64_t(walTTL), allow_os_buffer: C.bool(true), logging_enabled: C.bool(log.V(3)), num_cpu: C.int(runtime.NumCPU()), max_open_files: C.int(r.maxOpenFiles), }) if err := statusToError(status); err != nil { return errors.Errorf("could not open rocksdb instance: %s", err) } // Update or add the version file if needed. if ver < versionCurrent { if err := writeVersionFile(r.dir); err != nil { return err } } // Start a goroutine that will finish when the underlying handle // is deallocated. This is used to check a leak in tests. go func() { <-r.deallocated }() return nil }
func (t *parallelTest) setup(spec *parTestSpec) { if spec.ClusterSize == 0 { spec.ClusterSize = 1 } if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Cluster Size: %d", spec.ClusterSize) } args := base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ Knobs: base.TestingKnobs{ SQLExecutor: &sql.ExecutorTestingKnobs{ WaitForGossipUpdate: true, CheckStmtStringChange: true, }, }, }, } t.cluster = serverutils.StartTestCluster(t, spec.ClusterSize, args) t.clients = make([][]*gosql.DB, spec.ClusterSize) for i := range t.clients { t.clients[i] = append(t.clients[i], t.cluster.ServerConn(i)) } r0 := sqlutils.MakeSQLRunner(t, t.clients[0][0]) if spec.RangeSplitSize != 0 { if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Setting range split size: %d", spec.RangeSplitSize) } zoneCfg := config.DefaultZoneConfig() zoneCfg.RangeMaxBytes = int64(spec.RangeSplitSize) zoneCfg.RangeMinBytes = zoneCfg.RangeMaxBytes / 2 buf, err := protoutil.Marshal(&zoneCfg) if err != nil { t.Fatal(err) } objID := keys.RootNamespaceID r0.Exec(`UPDATE system.zones SET config = $2 WHERE id = $1`, objID, buf) } if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Creating database") } r0.Exec("CREATE DATABASE test") for i := range t.clients { sqlutils.MakeSQLRunner(t, t.clients[i][0]).Exec("SET DATABASE = test") } if testing.Verbose() || log.V(1) { log.Infof(t.ctx, "Test setup done") } }
// If the time is greater than the timestamp stored at `key`, run `f`. // Before running `f`, the timestamp is updated forward by a small amount via // a compare-and-swap to ensure at-most-one concurrent execution. After `f` // executes the timestamp is set to the next execution time. // Returns how long until `f` should be run next (i.e. when this method should // be called again). func (s *Server) maybeRunPeriodicCheck( op string, key roachpb.Key, f func(context.Context), ) time.Duration { ctx, span := s.AnnotateCtxWithSpan(context.Background(), "op") defer span.Finish() // Add the op name to the log context. ctx = log.WithLogTag(ctx, op, nil) resp, err := s.db.Get(ctx, key) if err != nil { log.Infof(ctx, "error reading time: %s", err) return updateCheckRetryFrequency } // We should early returned below if either the next check time is in the // future or if the atomic compare-and-set of that time failed (which // would happen if two nodes tried at the same time). if resp.Exists() { whenToCheck, pErr := resp.Value.GetTime() if pErr != nil { log.Warningf(ctx, "error decoding time: %s", err) return updateCheckRetryFrequency } else if delay := whenToCheck.Sub(timeutil.Now()); delay > 0 { return delay } nextRetry := whenToCheck.Add(updateCheckRetryFrequency) if err := s.db.CPut(ctx, key, nextRetry, whenToCheck); err != nil { if log.V(2) { log.Infof(ctx, "could not set next version check time (maybe another node checked?): %s", err) } return updateCheckRetryFrequency } } else { log.Infof(ctx, "No previous %s time.", op) nextRetry := timeutil.Now().Add(updateCheckRetryFrequency) // CPut with `nil` prev value to assert that no other node has checked. if err := s.db.CPut(ctx, key, nextRetry, nil); err != nil { if log.V(2) { log.Infof(ctx, "Could not set %s time (maybe another node checked?): %v", op, err) } return updateCheckRetryFrequency } } f(ctx) if err := s.db.Put(ctx, key, timeutil.Now().Add(updateCheckFrequency)); err != nil { log.Infof(ctx, "Error updating %s time: %v", op, err) } return updateCheckFrequency }
func testPutInner(ctx context.Context, t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { db, err := c.NewClient(ctx, 0) if err != nil { t.Fatal(err) } errs := make(chan error, c.NumNodes()) start := timeutil.Now() deadline := start.Add(cfg.Duration) var count int64 for i := 0; i < c.NumNodes(); i++ { go func() { r, _ := randutil.NewPseudoRand() value := randutil.RandBytes(r, 8192) for timeutil.Now().Before(deadline) { k := atomic.AddInt64(&count, 1) v := value[:r.Intn(len(value))] if err := db.Put(ctx, fmt.Sprintf("%08d", k), v); err != nil { errs <- err return } } errs <- nil }() } for i := 0; i < c.NumNodes(); { baseCount := atomic.LoadInt64(&count) select { case <-stopper.ShouldStop(): t.Fatalf("interrupted") case err := <-errs: if err != nil { t.Fatal(err) } i++ case <-time.After(1 * time.Second): // Periodically print out progress so that we know the test is still // running. loadedCount := atomic.LoadInt64(&count) log.Infof(ctx, "%d (%d/s)", loadedCount, loadedCount-baseCount) c.Assert(ctx, t) if err := cluster.Consistent(ctx, c, 0); err != nil { t.Fatal(err) } } } elapsed := timeutil.Since(start) log.Infof(ctx, "%d %.1f/sec", count, float64(count)/elapsed.Seconds()) }
// clearOverlappingCachedRangeDescriptors looks up and clears any // cache entries which overlap the specified descriptor. func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors( desc *roachpb.RangeDescriptor, ) error { key := desc.EndKey metaKey, err := meta(key) if err != nil { return err } // Clear out any descriptors which subsume the key which we're going // to cache. For example, if an existing KeyMin->KeyMax descriptor // should be cleared out in favor of a KeyMin->"m" descriptor. k, v, ok := rdc.rangeCache.cache.Ceil(rangeCacheKey(metaKey)) if ok { descriptor := v.(*roachpb.RangeDescriptor) if descriptor.StartKey.Less(key) && !descriptor.EndKey.Less(key) { if log.V(2) { log.Infof(rdc.ctx, "clearing overlapping descriptor: key=%s desc=%s", k, descriptor) } rdc.rangeCache.cache.Del(k.(rangeCacheKey)) } } startMeta, err := meta(desc.StartKey) if err != nil { return err } endMeta, err := meta(desc.EndKey) if err != nil { return err } // Also clear any descriptors which are subsumed by the one we're // going to cache. This could happen on a merge (and also happens // when there's a lot of concurrency). Iterate from the range meta key // after RangeMetaKey(desc.StartKey) to the range meta key for desc.EndKey. var keys []rangeCacheKey rdc.rangeCache.cache.DoRange(func(k, v interface{}) bool { if log.V(2) { log.Infof(rdc.ctx, "clearing subsumed descriptor: key=%s desc=%s", k, v.(*roachpb.RangeDescriptor)) } keys = append(keys, k.(rangeCacheKey)) return false }, rangeCacheKey(startMeta.Next()), rangeCacheKey(endMeta)) for _, key := range keys { rdc.rangeCache.cache.Del(key) } return nil }
// manage manages outgoing clients. Periodically, the infostore is // scanned for infos with hop count exceeding the MaxHops // threshold. If the number of outgoing clients doesn't exceed // maxPeers(), a new gossip client is connected to a randomly selected // peer beyond MaxHops threshold. Otherwise, the least useful peer // node is cut off to make room for a replacement. Disconnected // clients are processed via the disconnected channel and taken out of // the outgoing address set. If there are no longer any outgoing // connections or the sentinel gossip is unavailable, the bootstrapper // is notified via the stalled conditional variable. func (g *Gossip) manage() { g.server.stopper.RunWorker(func() { ctx := g.AnnotateCtx(context.Background()) cullTicker := time.NewTicker(g.jitteredInterval(g.cullInterval)) stallTicker := time.NewTicker(g.jitteredInterval(g.stallInterval)) defer cullTicker.Stop() defer stallTicker.Stop() for { select { case <-g.server.stopper.ShouldStop(): return case c := <-g.disconnected: g.doDisconnected(c) case nodeID := <-g.tighten: g.tightenNetwork(nodeID) case <-cullTicker.C: func() { g.mu.Lock() if !g.outgoing.hasSpace() { leastUsefulID := g.mu.is.leastUseful(g.outgoing) if c := g.findClient(func(c *client) bool { return c.peerID == leastUsefulID }); c != nil { if log.V(1) { log.Infof(ctx, "closing least useful client %+v to tighten network graph", c) } log.Eventf(ctx, "culling %s", c.addr) c.close() // After releasing the lock, block until the client disconnects. defer func() { g.doDisconnected(<-g.disconnected) }() } else { if log.V(1) { g.clientsMu.Lock() log.Infof(ctx, "couldn't find least useful client among %+v", g.clientsMu.clients) g.clientsMu.Unlock() } } } g.mu.Unlock() }() case <-stallTicker.C: g.mu.Lock() g.maybeSignalStatusChangeLocked() g.mu.Unlock() } } }) }
// processValueSingle processes the given value (of column // family.DefaultColumnID), setting values in the rf.row accordingly. The key is // only used for logging. func (rf *RowFetcher) processValueSingle( family *ColumnFamilyDescriptor, kv client.KeyValue, debugStrings bool, prettyKeyPrefix string, ) (prettyKey string, prettyValue string, err error) { prettyKey = prettyKeyPrefix colID := family.DefaultColumnID if colID == 0 { // If this is the sentinel family, a value is not expected, so we're done. // Otherwise, this means something went wrong in the TableDescriptor // bookkeeping. if family.ID == keys.SentinelFamilyID { return "", "", nil } return "", "", errors.Errorf("single entry value with no default column id") } idx, ok := rf.colIdxMap[colID] if ok && (debugStrings || rf.valNeededForCol[idx]) { if debugStrings { prettyKey = fmt.Sprintf("%s/%s", prettyKey, rf.desc.Columns[idx].Name) } typ := rf.cols[idx].Type // TODO(arjun): The value is a directly marshaled single value, so we // unmarshal it eagerly here. This can potentially be optimized out, // although that would require changing UnmarshalColumnValue to operate // on bytes, and for Encode/DecodeTableValue to operate on marshaled // single values. value, err := UnmarshalColumnValue(&rf.alloc, typ, kv.Value) if err != nil { return "", "", err } if debugStrings { prettyValue = value.String() } if !rf.row[idx].IsUnset() { panic(fmt.Sprintf("duplicate value for column %d", idx)) } rf.row[idx] = DatumToEncDatum(typ, value) if log.V(3) { log.Infof(context.TODO(), "Scan %s -> %v", kv.Key, value) } } else { // No need to unmarshal the column value. Either the column was part of // the index key or it isn't needed. if log.V(3) { log.Infof(context.TODO(), "Scan %s -> [%d] (skipped)", kv.Key, colID) } } return prettyKey, prettyValue, nil }