func (s *Server) reportUsage() { b := new(bytes.Buffer) if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil { log.Warning(context.TODO(), err) return } q := reportingURL.Query() q.Set("version", build.GetInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) reportingURL.RawQuery = q.Encode() res, err := http.Post(reportingURL.String(), "application/json", b) if err != nil && log.V(2) { // This is probably going to be relatively common in production // environments where network access is usually curtailed. log.Warning(context.TODO(), "Failed to report node usage metrics: ", err) return } if res.StatusCode != http.StatusOK { b, err := ioutil.ReadAll(res.Body) log.Warningf(context.TODO(), "Failed to report node usage metrics: status: %s, body: %s, "+ "error: %v", res.Status, b, err) } }
func (txn *Txn) exec(retryable func(txn *Txn) *roachpb.Error) *roachpb.Error { // Run retryable in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var pErr *roachpb.Error for r := retry.Start(txn.db.txnRetryOptions); r.Next(); { pErr = retryable(txn) if pErr == nil && txn.Proto.Status == roachpb.PENDING { // retryable succeeded, but didn't commit. pErr = txn.commit(nil) } if pErr != nil { switch pErr.TransactionRestart { case roachpb.TransactionRestart_IMMEDIATE: if log.V(2) { log.Warning(pErr) } r.Reset() continue case roachpb.TransactionRestart_BACKOFF: if log.V(2) { log.Warning(pErr) } continue } // By default, fall through and break. } break } txn.Cleanup(pErr) return pErr }
func (p *planner) releaseLeases(db client.DB) { if p.leases != nil { for _, lease := range p.leases { if err := p.leaseMgr.Release(lease); err != nil { log.Warning(err) } } p.leases = nil } // TODO(pmattis): This is a hack. Remove when schema change operations work // properly. if p.modifiedSchemas != nil { for _, d := range p.modifiedSchemas { var lease *LeaseState err := db.Txn(func(txn *client.Txn) error { var err error lease, err = p.leaseMgr.Acquire(txn, d.id, d.version) return err }) if err != nil { log.Warning(err) continue } if err := p.leaseMgr.Release(lease); err != nil { log.Warning(err) } } p.modifiedSchemas = nil } }
// Execute the entire schema change in steps. func (sc SchemaChanger) exec() *roachpb.Error { // Acquire lease. lease, pErr := sc.AcquireLease() if pErr != nil { return pErr } // Always try to release lease. defer func(l *TableDescriptor_SchemaChangeLease) { if pErr := sc.ReleaseLease(*l); pErr != nil { log.Warning(pErr) } }(&lease) // Increment the version and unset tableDescriptor.UpVersion. if pErr := sc.MaybeIncrementVersion(); pErr != nil { return pErr } // Wait for the schema change to propagate to all nodes after this function // returns, so that the new schema is live everywhere. This is not needed for // correctness but is done to make the UI experience/tests predictable. defer func() { if pErr := sc.waitToUpdateLeases(); pErr != nil { log.Warning(pErr) } }() if sc.mutationID == invalidMutationID { // Nothing more to do. return nil } // Another transaction might set the up_version bit again, // but we're no longer responsible for taking care of that. // Run through mutation state machine before backfill. if pErr := sc.RunStateMachineBeforeBackfill(); pErr != nil { return pErr } // Apply backfill. if pErr := sc.applyMutations(&lease); pErr != nil { // Purge the mutations if the application of the mutations fail. if errPurge := sc.purgeMutations(&lease); errPurge != nil { return roachpb.NewErrorf("error purging mutation: %s, after error: %s", errPurge, pErr) } return pErr } // Mark the mutations as completed. return sc.done() }
// runHeartbeat sends periodic heartbeats to client, marking the client healthy // or unhealthy and reconnecting appropriately until either the Client or the // supplied channel is closed. func (c *Client) runHeartbeat(retryOpts retry.Options, closer <-chan struct{}) { isHealthy := false setHealthy := func() { if isHealthy { return } isHealthy = true close(c.healthy.Load().(chan struct{})) } setUnhealthy := func() { if isHealthy { isHealthy = false c.healthy.Store(make(chan struct{})) } } var err = errUnstarted // initial condition for { for r := retry.Start(retryOpts); r.Next(); { // Reconnect on failure. if err != nil { if err = c.connect(); err != nil { setUnhealthy() log.Warning(err) continue } } // Heartbeat regardless of failure. if err = c.heartbeat(); err != nil { setUnhealthy() log.Warning(err) continue } setHealthy() break } // Wait after the heartbeat so that the first iteration gets a wait-free // heartbeat attempt. select { case <-closer: c.Close() return case <-c.Closed: return case <-time.After(heartbeatInterval): // TODO(tamird): Perhaps retry more aggressively when the client is unhealthy. } } }
// runHeartbeat sends periodic heartbeats to client, marking the client healthy // or unhealthy and reconnecting appropriately until either the Client or the // supplied channel is closed. func (c *Client) runHeartbeat(retryOpts retry.Options, closer <-chan struct{}) { isHealthy := false setHealthy := func() { if isHealthy { return } isHealthy = true close(c.healthy.Load().(chan struct{})) } setUnhealthy := func() { if isHealthy { isHealthy = false c.healthy.Store(make(chan struct{})) } } connErr := errUnstarted // initial condition var beatErr error for { for r := retry.Start(retryOpts); r.Next(); { // Reconnect if connection failed or heartbeat error is not // definitely temporary. if netErr, ok := beatErr.(net.Error); connErr != nil || beatErr != nil && !(ok && netErr.Temporary()) { if connErr = c.connect(); connErr != nil { log.Warning(connErr) setUnhealthy() continue } } if beatErr = c.heartbeat(); beatErr == nil { setHealthy() break } else { log.Warning(beatErr) setUnhealthy() } } // Wait after the heartbeat so that the first iteration gets a wait-free // heartbeat attempt. select { case <-closer: c.Close() return case <-c.Closed: return case <-time.After(heartbeatInterval): // TODO(tamird): Perhaps retry more aggressively when the client is unhealthy. } } }
// removeLeaseIfExpiring removes a lease and returns true if it is about to expire. // The method also resets the transaction deadline. func (p *planner) removeLeaseIfExpiring(lease *LeaseState) bool { if lease == nil || lease.hasSomeLifeLeft(p.leaseMgr.clock) { return false } // Remove the lease from p.leases. idx := -1 for i, l := range p.leases { if l == lease { idx = i break } } if idx == -1 { log.Warningf(p.ctx(), "lease (%s) not found", lease) return false } p.leases[idx] = p.leases[len(p.leases)-1] p.leases[len(p.leases)-1] = nil p.leases = p.leases[:len(p.leases)-1] if err := p.leaseMgr.Release(lease); err != nil { log.Warning(p.ctx(), err) } // Reset the deadline so that a new deadline will be set after the lease is acquired. p.txn.ResetDeadline() for _, l := range p.leases { p.txn.UpdateDeadlineMaybe(hlc.Timestamp{WallTime: l.Expiration().UnixNano()}) } return true }
// Batch sends a request to Cockroach via RPC. Errors which are retryable are // retried with backoff in a loop using the default retry options. Other errors // sending the request are retried indefinitely using the same client command // ID to avoid reporting failure when in fact the command may have gone through // and been executed successfully. We retry here to eventually get through with // the same client command ID and be given the cached response. func (s *rpcSender) Send(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) { var err error var br proto.BatchResponse for r := retry.Start(s.retryOpts); r.Next(); { select { case <-s.client.Healthy(): default: err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method) log.Warning(err) continue } if err = s.client.Call(method, &ba, &br); err != nil { br.Reset() // don't trust anyone. // Assume all errors sending request are retryable. The actual // number of things that could go wrong is vast, but we don't // want to miss any which should in theory be retried with the // same client command ID. We log the error here as a warning so // there's visiblity that this is happening. Some of the errors // we'll sweep up in this net shouldn't be retried, but we can't // really know for sure which. log.Warningf("failed to send RPC request %s: %s", method, err) continue } // On successful post, we're done with retry loop. break } if err != nil { return nil, proto.NewError(err) } pErr := br.Error br.Error = nil return &br, pErr }
func (txn *Txn) exec(retryable func(txn *Txn) error) error { // Run retryable in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. var err error for r := retry.Start(txn.db.txnRetryOptions); r.Next(); { err = retryable(txn) if err == nil && txn.Proto.Status == roachpb.PENDING { // retryable succeeded, but didn't commit. err = txn.commit(nil) } if restartErr, ok := err.(roachpb.TransactionRestartError); ok { if log.V(2) { log.Warning(err) } switch restartErr.CanRestartTransaction() { case roachpb.TransactionRestart_IMMEDIATE: r.Reset() continue case roachpb.TransactionRestart_BACKOFF: continue } // By default, fall through and break. } break } txn.Cleanup(err) return err }
// Send implements the client.Sender interface. func (rls *retryableLocalSender) Send(_ context.Context, call proto.Call) { // Instant retry to handle the case of a range split, which is // exposed here as a RangeKeyMismatchError. retryOpts := retry.Options{} // In local tests, the RPCs are not actually sent over the wire. We // need to clone the Txn in order to avoid unexpected sharing // between TxnCoordSender and client.Txn. if header := call.Args.Header(); header.Txn != nil { header.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction) } var err error for r := retry.Start(retryOpts); r.Next(); { call.Reply.Header().Error = nil rls.LocalSender.Send(context.TODO(), call) // Check for range key mismatch error (this could happen if // range was split between lookup and execution). In this case, // reset header.Replica and engage retry loop. if err = call.Reply.Header().GoError(); err != nil { if _, ok := err.(*proto.RangeKeyMismatchError); ok { // Clear request replica. call.Args.Header().Replica = proto.Replica{} log.Warning(err) continue } } return } panic(fmt.Sprintf("local sender did not succeed: %s", err)) }
func (txn *Txn) exec(retryable func(txn *Txn) error) (err error) { // Run retryable in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. for r := retry.Start(txn.db.txnRetryOptions); r.Next(); { txn.haveTxnWrite, txn.haveEndTxn = false, false // always reset before [re]starting txn if err = retryable(txn); err == nil { if !txn.haveEndTxn && txn.haveTxnWrite { // If there were no errors running retryable, commit the txn. This // may block waiting for outstanding writes to complete in case // retryable didn't -- we need the most recent of all response // timestamps in order to commit. err = txn.Commit() } } if restartErr, ok := err.(proto.TransactionRestartError); ok { if log.V(2) { log.Warning(err) } if restartErr.CanRestartTransaction() == proto.TransactionRestart_IMMEDIATE { r.Reset() continue } else if restartErr.CanRestartTransaction() == proto.TransactionRestart_BACKOFF { continue } // By default, fall through and break. } break } if err != nil && txn.haveTxnWrite { if replyErr := txn.Rollback(); replyErr != nil { log.Errorf("failure aborting transaction: %s; abort caused by: %s", replyErr, err) } } return }
// Send sends call to Cockroach via an RPC request. Errors which are retryable // are retried with backoff in a loop using the default retry options. Other // errors sending the request are retried indefinitely using the same client // command ID to avoid reporting failure when in fact the command may have gone // through and been executed successfully. We retry here to eventually get // through with the same client command ID and be given the cached response. func (s *rpcSender) Send(_ context.Context, call proto.Call) { method := fmt.Sprintf("Server.%s", call.Args.Method()) var err error for r := retry.Start(s.retryOpts); r.Next(); { select { case <-s.client.Healthy(): default: err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method) log.Warning(err) continue } if err = s.client.Call(method, call.Args, call.Reply); err != nil { // Assume all errors sending request are retryable. The actual // number of things that could go wrong is vast, but we don't // want to miss any which should in theory be retried with the // same client command ID. We log the error here as a warning so // there's visiblity that this is happening. Some of the errors // we'll sweep up in this net shouldn't be retried, but we can't // really know for sure which. log.Warningf("failed to send RPC request %s: %s", method, err) continue } // On successful post, we're done with retry loop. break } if err != nil { call.Reply.Header().SetGoError(err) } }
// exec executes the request. Any error encountered is returned; it is // the caller's responsibility to update the response. func (e *Executor) execStmts(sql string, planMaker *planner) driver.Response { var resp driver.Response stmts, err := planMaker.parser.Parse(sql, parser.Syntax(planMaker.session.Syntax)) if err != nil { // A parse error occurred: we can't determine if there were multiple // statements or only one, so just pretend there was one. resp.Results = append(resp.Results, makeResultFromError(planMaker, err)) return resp } for _, stmt := range stmts { result, err := e.execStmt(stmt, planMaker) if err != nil { result = makeResultFromError(planMaker, err) } resp.Results = append(resp.Results, result) // Release the leases once a transaction is complete. if planMaker.txn == nil { planMaker.releaseLeases(e.db) // The previous transaction finished executing some schema changes. Wait for // the schema changes to propagate to all nodes, so that once the executor // returns the new schema are live everywhere. This is not needed for // correctness but is done to make the UI experience/tests predictable. if err := e.waitForCompletedSchemaChangesToPropagate(planMaker); err != nil { log.Warning(err) } } } return resp }
// runBenchmarkBank mirrors the SQL performed by examples/sql_bank, but // structured as a benchmark for easier usage of the Go performance analysis // tools like pprof, memprof and trace. func runBenchmarkBank(b *testing.B, db *sql.DB) { if _, err := db.Exec(`CREATE DATABASE IF NOT EXISTS bank`); err != nil { b.Fatal(err) } { // Initialize the "accounts" table. schema := ` CREATE TABLE IF NOT EXISTS bank.accounts ( id INT PRIMARY KEY, balance INT NOT NULL )` if _, err := db.Exec(schema); err != nil { b.Fatal(err) } if _, err := db.Exec("TRUNCATE TABLE bank.accounts"); err != nil { b.Fatal(err) } var placeholders bytes.Buffer var values []interface{} for i := 0; i < *numAccounts; i++ { if i > 0 { placeholders.WriteString(", ") } fmt.Fprintf(&placeholders, "($%d, 0)", i+1) values = append(values, i) } stmt := `INSERT INTO bank.accounts (id, balance) VALUES ` + placeholders.String() if _, err := db.Exec(stmt, values...); err != nil { b.Fatal(err) } } b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { from := rand.Intn(*numAccounts) to := rand.Intn(*numAccounts - 1) if from == to { to = *numAccounts - 1 } amount := rand.Intn(*maxTransfer) update := ` UPDATE bank.accounts SET balance = CASE id WHEN $1 THEN balance-$3 WHEN $2 THEN balance+$3 END WHERE id IN ($1, $2) AND (SELECT balance >= $3 FROM bank.accounts WHERE id = $1) ` if _, err := db.Exec(update, from, to, amount); err != nil { if log.V(1) { log.Warning(err) } continue } } }) b.StopTimer() }
func (is *infoStore) runCallbacks(key string, content roachpb.Value, callbacks ...Callback) { // Add the callbacks to the callback work list. f := func() { for _, method := range callbacks { method(key, content) } } is.callbackWorkMu.Lock() is.callbackWork = append(is.callbackWork, f) is.callbackWorkMu.Unlock() // Run callbacks in a goroutine to avoid mutex reentry. We also guarantee // callbacks are run in order such that if a key is updated twice in // succession, the second callback will never be run before the first. if err := is.stopper.RunAsyncTask(func() { // Grab the callback mutex to serialize execution of the callbacks. is.callbackMu.Lock() defer is.callbackMu.Unlock() // Grab and execute the list of work. is.callbackWorkMu.Lock() work := is.callbackWork is.callbackWork = nil is.callbackWorkMu.Unlock() for _, w := range work { w() } }); err != nil { log.Warning(err) } }
func (t *tableState) acquire(txn *client.Txn, version DescriptorVersion, store LeaseStore) (*LeaseState, *roachpb.Error) { t.mu.Lock() defer t.mu.Unlock() for { s := t.active.findNewest(version) if s != nil { if version != 0 && s != t.active.findNewest(0) { // If a lease was requested for an old version of the descriptor, // return it even if there is only a short time left before it // expires. We can't renew this lease as doing so would violate the // invariant that we only get leases on the newest version. The // transaction will either finish before the lease expires or it will // abort, which is what will happen if we returned an error here. s.refcount++ if log.V(3) { log.Infof("acquire: descID=%d version=%d refcount=%d", s.ID, s.Version, s.refcount) } return s, nil } minDesiredExpiration := store.clock.Now().GoTime().Add(MinLeaseDuration) if s.expiration.After(minDesiredExpiration) { s.refcount++ if log.V(3) { log.Infof("acquire: descID=%d version=%d refcount=%d", s.ID, s.Version, s.refcount) } return s, nil } } else if version != 0 { n := t.active.findNewest(0) if n != nil && version < n.Version { return nil, roachpb.NewErrorf("table %d unable to acquire lease on old version: %d < %d", t.id, version, n.Version) } } if t.acquiring != nil { // There is already a lease acquisition in progress. Wait for it to complete. t.acquireWait() } else { // There is no active lease acquisition so we'll go ahead and perform // one. t.acquiring = make(chan struct{}) s, pErr := t.acquireNodeLease(txn, version, store) close(t.acquiring) t.acquiring = nil if pErr != nil { return nil, pErr } t.active.insert(s) if err := t.releaseNonLatest(store); err != nil { log.Warning(err) } } // A new lease was added, so loop and perform the lookup again. } }
// GetStatusSummaries returns a status summary messages for the node, along with // a status summary for every individual store within the node. func (nsr *NodeStatusRecorder) GetStatusSummaries() (*NodeStatus, []storage.StoreStatus) { nsr.RLock() defer nsr.RUnlock() if nsr.desc.NodeID == 0 { // We haven't yet processed initialization information; do nothing. if log.V(1) { log.Warning("NodeStatusRecorder.GetStatusSummaries called before StartNode event received.") } return nil, nil } now := nsr.clock.PhysicalNow() // Generate an node status with no store data. nodeStat := &NodeStatus{ Desc: nsr.desc, UpdatedAt: now, StartedAt: nsr.startedAt, StoreIDs: make([]roachpb.StoreID, 0, nsr.lastSummaryCount), } storeStats := make([]storage.StoreStatus, 0, nsr.lastSummaryCount) // Generate status summaries for stores, while accumulating data into the // NodeStatus. nsr.visitStoreMonitors(func(ssm *StoreStatusMonitor) { // Accumulate per-store values into node status. // TODO(mrtracy): A number of the fields on the protocol buffer are // Int32s when they would be more easily represented as Int64. nodeStat.StoreIDs = append(nodeStat.StoreIDs, ssm.ID) nodeStat.Stats.Add(&ssm.stats) nodeStat.RangeCount += int32(ssm.rangeCount.Count()) nodeStat.LeaderRangeCount += int32(ssm.leaderRangeCount.Value()) nodeStat.ReplicatedRangeCount += int32(ssm.replicatedRangeCount.Value()) nodeStat.AvailableRangeCount += int32(ssm.availableRangeCount.Value()) // Its difficult to guarantee that we have the store descriptor yet; we // may not have processed a StoreStatusEvent yet for this store. Just // skip the store summary in this case. if ssm.desc == nil { return } status := storage.StoreStatus{ Desc: *ssm.desc, NodeID: nsr.desc.NodeID, UpdatedAt: now, StartedAt: ssm.startedAt, Stats: ssm.stats, RangeCount: int32(ssm.rangeCount.Count()), LeaderRangeCount: int32(ssm.leaderRangeCount.Value()), ReplicatedRangeCount: int32(ssm.replicatedRangeCount.Value()), AvailableRangeCount: int32(ssm.availableRangeCount.Value()), } storeStats = append(storeStats, status) }) return nodeStat, storeStats }
// TestTxnMultipleCoord checks that a coordinator uses the Writing flag to // enforce that only one coordinator can be used for transactional writes. func TestTxnMultipleCoord(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() for i, tc := range []struct { call proto.Call writing bool ok bool }{ {proto.GetCall(proto.Key("a")), true, true}, {proto.GetCall(proto.Key("a")), false, true}, {proto.PutCall(proto.Key("a"), proto.Value{}), false, true}, {proto.PutCall(proto.Key("a"), proto.Value{}), true, false}, } { { txn := newTxn(s.Clock, proto.Key("a")) txn.Writing = tc.writing tc.call.Args.Header().Txn = txn } err := sendCall(s.Sender, tc.call) if err == nil != tc.ok { t.Errorf("%d: %T (writing=%t): success_expected=%t, but got: %v", i, tc.call.Args, tc.writing, tc.ok, err) } if err != nil { continue } txn := tc.call.Reply.Header().Txn // The transaction should come back rw if it started rw or if we just // wrote. isWrite := proto.IsTransactionWrite(tc.call.Args) if (tc.writing || isWrite) != txn.Writing { t.Errorf("%d: unexpected writing state: %s", i, txn) } if !isWrite { continue } // Abort for clean shutdown. etReply := &proto.EndTransactionResponse{} if err := sendCall(s.Sender, proto.Call{ Args: &proto.EndTransactionRequest{ RequestHeader: proto.RequestHeader{ Key: txn.Key, Timestamp: txn.Timestamp, Txn: txn, }, Commit: false, }, Reply: etReply, }); err != nil { log.Warning(err) t.Fatal(err) } } }
// GetTimeSeriesData returns a slice of interesting TimeSeriesData from the // encapsulated NodeStatusMonitor. func (nsr *NodeStatusRecorder) GetTimeSeriesData() []ts.TimeSeriesData { nsr.RLock() defer nsr.RUnlock() if nsr.desc.NodeID == 0 { // We haven't yet processed initialization information; do nothing. if log.V(1) { log.Warning("NodeStatusRecorder.GetTimeSeriesData called before StartNode event received.") } return nil } if nsr.source == "" { nsr.source = strconv.FormatInt(int64(nsr.desc.NodeID), 10) } data := make([]ts.TimeSeriesData, 0, nsr.lastDataCount) // Record node stats. now := nsr.clock.PhysicalNow() data = append(data, nsr.recordInt(now, "calls.success", atomic.LoadInt64(&nsr.callCount))) data = append(data, nsr.recordInt(now, "calls.error", atomic.LoadInt64(&nsr.callErrors))) // Record per store stats. nsr.visitStoreMonitors(func(ssm *StoreStatusMonitor) { now := nsr.clock.PhysicalNow() ssr := storeStatusRecorder{ StoreStatusMonitor: ssm, timestampNanos: now, source: strconv.FormatInt(int64(ssm.ID), 10), } data = append(data, ssr.recordInt("livebytes", ssr.stats.LiveBytes)) data = append(data, ssr.recordInt("keybytes", ssr.stats.KeyBytes)) data = append(data, ssr.recordInt("valbytes", ssr.stats.ValBytes)) data = append(data, ssr.recordInt("intentbytes", ssr.stats.IntentBytes)) data = append(data, ssr.recordInt("livecount", ssr.stats.LiveCount)) data = append(data, ssr.recordInt("keycount", ssr.stats.KeyCount)) data = append(data, ssr.recordInt("valcount", ssr.stats.ValCount)) data = append(data, ssr.recordInt("intentcount", ssr.stats.IntentCount)) data = append(data, ssr.recordInt("intentage", ssr.stats.IntentAge)) data = append(data, ssr.recordInt("gcbytesage", ssr.stats.GCBytesAge)) data = append(data, ssr.recordInt("lastupdatenanos", ssr.stats.LastUpdateNanos)) data = append(data, ssr.recordInt("ranges", ssr.rangeCount)) data = append(data, ssr.recordInt("ranges.leader", int64(ssr.leaderRangeCount))) data = append(data, ssr.recordInt("ranges.replicated", int64(ssr.replicatedRangeCount))) data = append(data, ssr.recordInt("ranges.available", int64(ssr.availableRangeCount))) // Record statistics from descriptor. if ssr.desc != nil { capacity := ssr.desc.Capacity data = append(data, ssr.recordInt("capacity", int64(capacity.Capacity))) data = append(data, ssr.recordInt("capacity.available", int64(capacity.Available))) } }) nsr.lastDataCount = len(data) return data }
// releaseLeases implements the SchemaAccessor interface. func (p *planner) releaseLeases() { if p.leases != nil { for _, lease := range p.leases { if err := p.leaseMgr.Release(lease); err != nil { log.Warning(err) } } p.leases = nil } }
// Slice returns the data stored in the underlying storage. func (es *SampleStorage) Slice() []interface{} { startKey := MakeKey(es.prefix, keyDataPrefix) res, err := es.engine.Scan( startKey, PrefixEndKey(startKey), es.Seen()) if err != nil { log.Warning(err) return nil } sl := make([]interface{}, len(res)) for i, kv := range res { if dv, err := encoding.GobDecode(kv.Value); err == nil { sl[i] = dv } else { log.Warning(err) } } return sl }
func (p *planner) releaseLeases(db client.DB) { if p.leases != nil { for _, lease := range p.leases { if pErr := p.leaseMgr.Release(lease); pErr != nil { log.Warning(pErr) } } p.leases = nil } }
// RegisterCallback registers a callback for a key pattern to be // invoked whenever new info for a gossip key matching pattern is // received. The callback method is invoked with the info key which // matched pattern. func (g *Gossip) RegisterCallback(pattern string, method Callback) { if pattern == KeySystemConfig { log.Warning("raw gossip callback registered on %s, consider using RegisterSystemConfigCallback", KeySystemConfig) } g.mu.Lock() defer g.mu.Unlock() g.is.registerCallback(pattern, method) }
func logLinuxStats() { if !log.V(1) { return } // We don't know which fields in struct mallinfo are most relevant to us yet, // so log it all for now. // // A major caveat is that mallinfo() returns stats only for the main arena. // glibc uses multiple allocation arenas to increase malloc performance for // multithreaded processes, so mallinfo may not report on significant parts // of the heap. mi := C.mallinfo() log.Infof("mallinfo stats: ordblks=%s, smblks=%s, hblks=%s, hblkhd=%s, usmblks=%s, fsmblks=%s, "+ "uordblks=%s, fordblks=%s, keepcost=%s", humanize.IBytes(uint64(mi.ordblks)), humanize.IBytes(uint64(mi.smblks)), humanize.IBytes(uint64(mi.hblks)), humanize.IBytes(uint64(mi.hblkhd)), humanize.IBytes(uint64(mi.usmblks)), humanize.IBytes(uint64(mi.fsmblks)), humanize.IBytes(uint64(mi.uordblks)), humanize.IBytes(uint64(mi.fordblks)), humanize.IBytes(uint64(mi.fsmblks))) // malloc_info() emits a *lot* of XML, partly because it generates stats for // all arenas, unlike mallinfo(). // // TODO(cdo): extract the useful bits and record to time-series DB. if !log.V(2) { return } // Create a memstream and make malloc_info() output to it. var buf *C.char var bufSize C.size_t memstream := C.open_memstream(&buf, &bufSize) if memstream == nil { log.Warning("couldn't open memstream") return } defer func() { C.fclose(memstream) C.free(unsafe.Pointer(buf)) }() if rc := C.malloc_info(0, memstream); rc != 0 { log.Warningf("malloc_info returned %d", rc) return } if rc := C.fflush(memstream); rc != 0 { log.Warningf("fflush returned %d", rc) return } log.Infof("malloc_info: %s", C.GoString(buf)) }
// executeCmd interprets the given message as a *roachpb.BatchRequest and sends it // via the local sender. func (n *Node) executeCmd(argsI proto.Message) (proto.Message, error) { ba := argsI.(*roachpb.BatchRequest) var br *roachpb.BatchResponse opName := "node " + strconv.Itoa(int(n.Descriptor.NodeID)) // could save allocs here fail := func(err error) { br = &roachpb.BatchResponse{} br.Error = roachpb.NewError(err) } f := func() { sp, err := tracing.JoinOrNew(n.ctx.Tracer, ba.Trace, opName) if err != nil { fail(err) return } // If this is a snowball span, it gets special treatment: It skips the // regular tracing machinery, and we instead send the collected spans // back with the response. This is more expensive, but then again, // those are individual requests traced by users, so they can be. if sp.BaggageItem(tracing.Snowball) != "" { if sp, err = tracing.JoinOrNewSnowball(opName, ba.Trace, func(rawSpan basictracer.RawSpan) { encSp, err := tracing.EncodeRawSpan(&rawSpan, nil) if err != nil { log.Warning(err) } br.CollectedSpans = append(br.CollectedSpans, encSp) }); err != nil { fail(err) return } } defer sp.Finish() ctx := opentracing.ContextWithSpan((*Node)(n).context(), sp) tStart := time.Now() var pErr *roachpb.Error br, pErr = n.stores.Send(ctx, *ba) if pErr != nil { br = &roachpb.BatchResponse{} sp.LogEvent(fmt.Sprintf("error: %T", pErr.GetDetail())) } if br.Error != nil { panic(roachpb.ErrorUnexpectedlySet(n.stores, br)) } n.metrics.callComplete(time.Now().Sub(tStart), pErr) br.Error = pErr } if !n.stopper.RunTask(f) { return nil, util.Errorf("node %d stopped", n.Descriptor.NodeID) } return br, nil }
// shouldQueue determines whether a range should be queued for truncating. This // is true only if the replica is the raft leader and if the total number of // the range's raft log's stale entries exceeds RaftLogQueueStaleThreshold. func (*raftLogQueue) shouldQueue(now roachpb.Timestamp, r *Replica, _ *config.SystemConfig) (shouldQ bool, priority float64) { truncatableIndexes, _, err := getTruncatableIndexes(r) if err != nil { log.Warning(err) return false, 0 } return truncatableIndexes > RaftLogQueueStaleThreshold, float64(truncatableIndexes) }
func (s *state) stop() { log.V(6).Infof("node %v stopping", s.nodeID) for _, n := range s.nodes { err := n.client.conn.Close() if err != nil { log.Warning("error stopping client:", err) } } s.writeTask.stop() close(s.stopped) }
// RefreshLeases starts a goroutine that refreshes the lease manager // leases for tables received in the latest system configuration via gossip. func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) { s.RunWorker(func() { descKeyPrefix := keys.MakeTablePrefix(uint32(DescriptorTable.ID)) gossip.RegisterSystemConfigCallback(m.updateSystemConfig) for { select { case <-m.newConfig: // Read all tables and their versions cfg := m.getSystemConfig() if log.V(2) { log.Info("received a new config %v", cfg) } // Loop through the configuration to find all the tables. for _, kv := range cfg.Values { if kv.Value.Tag != roachpb.ValueType_BYTES { continue } if !bytes.HasPrefix(kv.Key, descKeyPrefix) { continue } // Attempt to unmarshal config into a table/database descriptor. var descriptor Descriptor if err := kv.Value.GetProto(&descriptor); err != nil { log.Warningf("unable to unmarshal descriptor %v", kv.Value) continue } switch union := descriptor.Union.(type) { case *Descriptor_Table: table := union.Table if err := table.Validate(); err != nil { log.Errorf("received invalid table descriptor: %v", table) continue } if log.V(2) { log.Infof("refreshing lease table: %d, version: %d", table.ID, table.Version) } // Try to refresh the table lease to one >= this version. if err := m.refreshLease(db, table.ID, table.Version); err != nil { log.Warning(err) } case *Descriptor_Database: // Ignore. } } case <-s.ShouldStop(): return } } }) }
// Put adds an element to the storage at the index specified. func (es *SampleStorage) Put(i int, v interface{}) { if i < 0 || i >= es.Size() { return } err := PutI(es.engine, es.indexToKey(i), &v) if err != nil { log.Warning(err) } else { es.incVar(keySeen, 1) } }
// GetStatusSummary returns a status summary messages for the node. The summary // includes the recent values of metrics for both the node and all of its // component stores. func (mr *MetricsRecorder) GetStatusSummary() *NodeStatus { mr.mu.Lock() defer mr.mu.Unlock() if mr.mu.nodeRegistry == nil { // We haven't yet processed initialization information; do nothing. if log.V(1) { log.Warning(context.TODO(), "MetricsRecorder.GetStatusSummary called before NodeID allocation.") } return nil } now := mr.mu.clock.PhysicalNow() // Generate an node status with no store data. nodeStat := &NodeStatus{ Desc: mr.mu.desc, BuildInfo: build.GetInfo(), UpdatedAt: now, StartedAt: mr.mu.startedAt, StoreStatuses: make([]StoreStatus, 0, mr.mu.lastSummaryCount), Metrics: make(map[string]float64, mr.mu.lastNodeMetricCount), } eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) { nodeStat.Metrics[name] = val }) // Generate status summaries for stores. for storeID, r := range mr.mu.storeRegistries { storeMetrics := make(map[string]float64, mr.mu.lastStoreMetricCount) eachRecordableValue(r, func(name string, val float64) { storeMetrics[name] = val }) // Gather descriptor from store. descriptor, err := mr.mu.stores[storeID].Descriptor() if err != nil { log.Errorf(context.TODO(), "Could not record status summaries: Store %d could not return descriptor, error: %s", storeID, err) continue } nodeStat.StoreStatuses = append(nodeStat.StoreStatuses, StoreStatus{ Desc: *descriptor, Metrics: storeMetrics, }) } mr.mu.lastSummaryCount = len(nodeStat.StoreStatuses) mr.mu.lastNodeMetricCount = len(nodeStat.Metrics) if len(nodeStat.StoreStatuses) > 0 { mr.mu.lastStoreMetricCount = len(nodeStat.StoreStatuses[0].Metrics) } return nodeStat }