Ejemplo n.º 1
0
func (s *Server) reportUsage() {
	b := new(bytes.Buffer)
	if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil {
		log.Warning(context.TODO(), err)
		return
	}

	q := reportingURL.Query()
	q.Set("version", build.GetInfo().Tag)
	q.Set("uuid", s.node.ClusterID.String())
	reportingURL.RawQuery = q.Encode()

	res, err := http.Post(reportingURL.String(), "application/json", b)
	if err != nil && log.V(2) {
		// This is probably going to be relatively common in production
		// environments where network access is usually curtailed.
		log.Warning(context.TODO(), "Failed to report node usage metrics: ", err)
		return
	}

	if res.StatusCode != http.StatusOK {
		b, err := ioutil.ReadAll(res.Body)
		log.Warningf(context.TODO(), "Failed to report node usage metrics: status: %s, body: %s, "+
			"error: %v", res.Status, b, err)
	}
}
Ejemplo n.º 2
0
Archivo: txn.go Proyecto: l2x/cockroach
func (txn *Txn) exec(retryable func(txn *Txn) *roachpb.Error) *roachpb.Error {
	// Run retryable in a retry loop until we encounter a success or
	// error condition this loop isn't capable of handling.
	var pErr *roachpb.Error
	for r := retry.Start(txn.db.txnRetryOptions); r.Next(); {
		pErr = retryable(txn)
		if pErr == nil && txn.Proto.Status == roachpb.PENDING {
			// retryable succeeded, but didn't commit.
			pErr = txn.commit(nil)
		}

		if pErr != nil {
			switch pErr.TransactionRestart {
			case roachpb.TransactionRestart_IMMEDIATE:
				if log.V(2) {
					log.Warning(pErr)
				}
				r.Reset()
				continue
			case roachpb.TransactionRestart_BACKOFF:
				if log.V(2) {
					log.Warning(pErr)
				}
				continue
			}
			// By default, fall through and break.
		}
		break
	}
	txn.Cleanup(pErr)
	return pErr
}
Ejemplo n.º 3
0
func (p *planner) releaseLeases(db client.DB) {
	if p.leases != nil {
		for _, lease := range p.leases {
			if err := p.leaseMgr.Release(lease); err != nil {
				log.Warning(err)
			}
		}
		p.leases = nil
	}

	// TODO(pmattis): This is a hack. Remove when schema change operations work
	// properly.
	if p.modifiedSchemas != nil {
		for _, d := range p.modifiedSchemas {
			var lease *LeaseState
			err := db.Txn(func(txn *client.Txn) error {
				var err error
				lease, err = p.leaseMgr.Acquire(txn, d.id, d.version)
				return err
			})
			if err != nil {
				log.Warning(err)
				continue
			}
			if err := p.leaseMgr.Release(lease); err != nil {
				log.Warning(err)
			}
		}
		p.modifiedSchemas = nil
	}
}
Ejemplo n.º 4
0
// Execute the entire schema change in steps.
func (sc SchemaChanger) exec() *roachpb.Error {
	// Acquire lease.
	lease, pErr := sc.AcquireLease()
	if pErr != nil {
		return pErr
	}
	// Always try to release lease.
	defer func(l *TableDescriptor_SchemaChangeLease) {
		if pErr := sc.ReleaseLease(*l); pErr != nil {
			log.Warning(pErr)
		}
	}(&lease)

	// Increment the version and unset tableDescriptor.UpVersion.
	if pErr := sc.MaybeIncrementVersion(); pErr != nil {
		return pErr
	}

	// Wait for the schema change to propagate to all nodes after this function
	// returns, so that the new schema is live everywhere. This is not needed for
	// correctness but is done to make the UI experience/tests predictable.
	defer func() {
		if pErr := sc.waitToUpdateLeases(); pErr != nil {
			log.Warning(pErr)
		}
	}()

	if sc.mutationID == invalidMutationID {
		// Nothing more to do.
		return nil
	}

	// Another transaction might set the up_version bit again,
	// but we're no longer responsible for taking care of that.

	// Run through mutation state machine before backfill.
	if pErr := sc.RunStateMachineBeforeBackfill(); pErr != nil {
		return pErr
	}

	// Apply backfill.
	if pErr := sc.applyMutations(&lease); pErr != nil {
		// Purge the mutations if the application of the mutations fail.
		if errPurge := sc.purgeMutations(&lease); errPurge != nil {
			return roachpb.NewErrorf("error purging mutation: %s, after error: %s", errPurge, pErr)
		}
		return pErr
	}

	// Mark the mutations as completed.
	return sc.done()
}
Ejemplo n.º 5
0
// runHeartbeat sends periodic heartbeats to client, marking the client healthy
// or unhealthy and reconnecting appropriately until either the Client or the
// supplied channel is closed.
func (c *Client) runHeartbeat(retryOpts retry.Options, closer <-chan struct{}) {
	isHealthy := false
	setHealthy := func() {
		if isHealthy {
			return
		}
		isHealthy = true
		close(c.healthy.Load().(chan struct{}))
	}
	setUnhealthy := func() {
		if isHealthy {
			isHealthy = false
			c.healthy.Store(make(chan struct{}))
		}
	}

	var err = errUnstarted // initial condition
	for {
		for r := retry.Start(retryOpts); r.Next(); {
			// Reconnect on failure.
			if err != nil {
				if err = c.connect(); err != nil {
					setUnhealthy()
					log.Warning(err)
					continue
				}
			}

			// Heartbeat regardless of failure.
			if err = c.heartbeat(); err != nil {
				setUnhealthy()
				log.Warning(err)
				continue
			}

			setHealthy()
			break
		}

		// Wait after the heartbeat so that the first iteration gets a wait-free
		// heartbeat attempt.
		select {
		case <-closer:
			c.Close()
			return
		case <-c.Closed:
			return
		case <-time.After(heartbeatInterval):
			// TODO(tamird): Perhaps retry more aggressively when the client is unhealthy.
		}
	}
}
Ejemplo n.º 6
0
// runHeartbeat sends periodic heartbeats to client, marking the client healthy
// or unhealthy and reconnecting appropriately until either the Client or the
// supplied channel is closed.
func (c *Client) runHeartbeat(retryOpts retry.Options, closer <-chan struct{}) {
	isHealthy := false
	setHealthy := func() {
		if isHealthy {
			return
		}
		isHealthy = true
		close(c.healthy.Load().(chan struct{}))
	}
	setUnhealthy := func() {
		if isHealthy {
			isHealthy = false
			c.healthy.Store(make(chan struct{}))
		}
	}

	connErr := errUnstarted // initial condition
	var beatErr error
	for {
		for r := retry.Start(retryOpts); r.Next(); {
			// Reconnect if connection failed or heartbeat error is not
			// definitely temporary.
			if netErr, ok := beatErr.(net.Error); connErr != nil || beatErr != nil && !(ok && netErr.Temporary()) {
				if connErr = c.connect(); connErr != nil {
					log.Warning(connErr)
					setUnhealthy()
					continue
				}
			}

			if beatErr = c.heartbeat(); beatErr == nil {
				setHealthy()
				break
			} else {
				log.Warning(beatErr)
				setUnhealthy()
			}
		}
		// Wait after the heartbeat so that the first iteration gets a wait-free
		// heartbeat attempt.
		select {
		case <-closer:
			c.Close()
			return
		case <-c.Closed:
			return
		case <-time.After(heartbeatInterval):
			// TODO(tamird): Perhaps retry more aggressively when the client is unhealthy.
		}
	}
}
Ejemplo n.º 7
0
// removeLeaseIfExpiring removes a lease and returns true if it is about to expire.
// The method also resets the transaction deadline.
func (p *planner) removeLeaseIfExpiring(lease *LeaseState) bool {
	if lease == nil || lease.hasSomeLifeLeft(p.leaseMgr.clock) {
		return false
	}

	// Remove the lease from p.leases.
	idx := -1
	for i, l := range p.leases {
		if l == lease {
			idx = i
			break
		}
	}
	if idx == -1 {
		log.Warningf(p.ctx(), "lease (%s) not found", lease)
		return false
	}
	p.leases[idx] = p.leases[len(p.leases)-1]
	p.leases[len(p.leases)-1] = nil
	p.leases = p.leases[:len(p.leases)-1]

	if err := p.leaseMgr.Release(lease); err != nil {
		log.Warning(p.ctx(), err)
	}

	// Reset the deadline so that a new deadline will be set after the lease is acquired.
	p.txn.ResetDeadline()
	for _, l := range p.leases {
		p.txn.UpdateDeadlineMaybe(hlc.Timestamp{WallTime: l.Expiration().UnixNano()})
	}
	return true
}
Ejemplo n.º 8
0
// Batch sends a request to Cockroach via RPC. Errors which are retryable are
// retried with backoff in a loop using the default retry options. Other errors
// sending the request are retried indefinitely using the same client command
// ID to avoid reporting failure when in fact the command may have gone through
// and been executed successfully. We retry here to eventually get through with
// the same client command ID and be given the cached response.
func (s *rpcSender) Send(ctx context.Context, ba proto.BatchRequest) (*proto.BatchResponse, *proto.Error) {
	var err error
	var br proto.BatchResponse
	for r := retry.Start(s.retryOpts); r.Next(); {
		select {
		case <-s.client.Healthy():
		default:
			err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method)
			log.Warning(err)
			continue
		}

		if err = s.client.Call(method, &ba, &br); err != nil {
			br.Reset() // don't trust anyone.
			// Assume all errors sending request are retryable. The actual
			// number of things that could go wrong is vast, but we don't
			// want to miss any which should in theory be retried with the
			// same client command ID. We log the error here as a warning so
			// there's visiblity that this is happening. Some of the errors
			// we'll sweep up in this net shouldn't be retried, but we can't
			// really know for sure which.
			log.Warningf("failed to send RPC request %s: %s", method, err)
			continue
		}

		// On successful post, we're done with retry loop.
		break
	}
	if err != nil {
		return nil, proto.NewError(err)
	}
	pErr := br.Error
	br.Error = nil
	return &br, pErr
}
Ejemplo n.º 9
0
func (txn *Txn) exec(retryable func(txn *Txn) error) error {
	// Run retryable in a retry loop until we encounter a success or
	// error condition this loop isn't capable of handling.
	var err error
	for r := retry.Start(txn.db.txnRetryOptions); r.Next(); {
		err = retryable(txn)
		if err == nil && txn.Proto.Status == roachpb.PENDING {
			// retryable succeeded, but didn't commit.
			err = txn.commit(nil)
		}
		if restartErr, ok := err.(roachpb.TransactionRestartError); ok {
			if log.V(2) {
				log.Warning(err)
			}
			switch restartErr.CanRestartTransaction() {
			case roachpb.TransactionRestart_IMMEDIATE:
				r.Reset()
				continue
			case roachpb.TransactionRestart_BACKOFF:
				continue
			}
			// By default, fall through and break.
		}
		break
	}
	txn.Cleanup(err)
	return err
}
Ejemplo n.º 10
0
// Send implements the client.Sender interface.
func (rls *retryableLocalSender) Send(_ context.Context, call proto.Call) {
	// Instant retry to handle the case of a range split, which is
	// exposed here as a RangeKeyMismatchError.
	retryOpts := retry.Options{}
	// In local tests, the RPCs are not actually sent over the wire. We
	// need to clone the Txn in order to avoid unexpected sharing
	// between TxnCoordSender and client.Txn.
	if header := call.Args.Header(); header.Txn != nil {
		header.Txn = gogoproto.Clone(header.Txn).(*proto.Transaction)
	}

	var err error
	for r := retry.Start(retryOpts); r.Next(); {
		call.Reply.Header().Error = nil
		rls.LocalSender.Send(context.TODO(), call)
		// Check for range key mismatch error (this could happen if
		// range was split between lookup and execution). In this case,
		// reset header.Replica and engage retry loop.
		if err = call.Reply.Header().GoError(); err != nil {
			if _, ok := err.(*proto.RangeKeyMismatchError); ok {
				// Clear request replica.
				call.Args.Header().Replica = proto.Replica{}
				log.Warning(err)
				continue
			}
		}
		return
	}
	panic(fmt.Sprintf("local sender did not succeed: %s", err))
}
Ejemplo n.º 11
0
func (txn *Txn) exec(retryable func(txn *Txn) error) (err error) {
	// Run retryable in a retry loop until we encounter a success or
	// error condition this loop isn't capable of handling.
	for r := retry.Start(txn.db.txnRetryOptions); r.Next(); {
		txn.haveTxnWrite, txn.haveEndTxn = false, false // always reset before [re]starting txn
		if err = retryable(txn); err == nil {
			if !txn.haveEndTxn && txn.haveTxnWrite {
				// If there were no errors running retryable, commit the txn. This
				// may block waiting for outstanding writes to complete in case
				// retryable didn't -- we need the most recent of all response
				// timestamps in order to commit.
				err = txn.Commit()
			}
		}
		if restartErr, ok := err.(proto.TransactionRestartError); ok {
			if log.V(2) {
				log.Warning(err)
			}
			if restartErr.CanRestartTransaction() == proto.TransactionRestart_IMMEDIATE {
				r.Reset()
				continue
			} else if restartErr.CanRestartTransaction() == proto.TransactionRestart_BACKOFF {
				continue
			}
			// By default, fall through and break.
		}
		break
	}
	if err != nil && txn.haveTxnWrite {
		if replyErr := txn.Rollback(); replyErr != nil {
			log.Errorf("failure aborting transaction: %s; abort caused by: %s", replyErr, err)
		}
	}
	return
}
Ejemplo n.º 12
0
// Send sends call to Cockroach via an RPC request. Errors which are retryable
// are retried with backoff in a loop using the default retry options. Other
// errors sending the request are retried indefinitely using the same client
// command ID to avoid reporting failure when in fact the command may have gone
// through and been executed successfully. We retry here to eventually get
// through with the same client command ID and be given the cached response.
func (s *rpcSender) Send(_ context.Context, call proto.Call) {
	method := fmt.Sprintf("Server.%s", call.Args.Method())

	var err error
	for r := retry.Start(s.retryOpts); r.Next(); {
		select {
		case <-s.client.Healthy():
		default:
			err = fmt.Errorf("failed to send RPC request %s: client is unhealthy", method)
			log.Warning(err)
			continue
		}

		if err = s.client.Call(method, call.Args, call.Reply); err != nil {
			// Assume all errors sending request are retryable. The actual
			// number of things that could go wrong is vast, but we don't
			// want to miss any which should in theory be retried with the
			// same client command ID. We log the error here as a warning so
			// there's visiblity that this is happening. Some of the errors
			// we'll sweep up in this net shouldn't be retried, but we can't
			// really know for sure which.
			log.Warningf("failed to send RPC request %s: %s", method, err)
			continue
		}

		// On successful post, we're done with retry loop.
		break
	}
	if err != nil {
		call.Reply.Header().SetGoError(err)
	}
}
Ejemplo n.º 13
0
// exec executes the request. Any error encountered is returned; it is
// the caller's responsibility to update the response.
func (e *Executor) execStmts(sql string, planMaker *planner) driver.Response {
	var resp driver.Response
	stmts, err := planMaker.parser.Parse(sql, parser.Syntax(planMaker.session.Syntax))
	if err != nil {
		// A parse error occurred: we can't determine if there were multiple
		// statements or only one, so just pretend there was one.
		resp.Results = append(resp.Results, makeResultFromError(planMaker, err))
		return resp
	}
	for _, stmt := range stmts {
		result, err := e.execStmt(stmt, planMaker)
		if err != nil {
			result = makeResultFromError(planMaker, err)
		}
		resp.Results = append(resp.Results, result)
		// Release the leases once a transaction is complete.
		if planMaker.txn == nil {
			planMaker.releaseLeases(e.db)

			// The previous transaction finished executing some schema changes. Wait for
			// the schema changes to propagate to all nodes, so that once the executor
			// returns the new schema are live everywhere. This is not needed for
			// correctness but is done to make the UI experience/tests predictable.
			if err := e.waitForCompletedSchemaChangesToPropagate(planMaker); err != nil {
				log.Warning(err)
			}
		}
	}
	return resp
}
Ejemplo n.º 14
0
// runBenchmarkBank mirrors the SQL performed by examples/sql_bank, but
// structured as a benchmark for easier usage of the Go performance analysis
// tools like pprof, memprof and trace.
func runBenchmarkBank(b *testing.B, db *sql.DB) {
	if _, err := db.Exec(`CREATE DATABASE IF NOT EXISTS bank`); err != nil {
		b.Fatal(err)
	}

	{
		// Initialize the "accounts" table.
		schema := `
CREATE TABLE IF NOT EXISTS bank.accounts (
  id INT PRIMARY KEY,
  balance INT NOT NULL
)`
		if _, err := db.Exec(schema); err != nil {
			b.Fatal(err)
		}
		if _, err := db.Exec("TRUNCATE TABLE bank.accounts"); err != nil {
			b.Fatal(err)
		}

		var placeholders bytes.Buffer
		var values []interface{}
		for i := 0; i < *numAccounts; i++ {
			if i > 0 {
				placeholders.WriteString(", ")
			}
			fmt.Fprintf(&placeholders, "($%d, 0)", i+1)
			values = append(values, i)
		}
		stmt := `INSERT INTO bank.accounts (id, balance) VALUES ` + placeholders.String()
		if _, err := db.Exec(stmt, values...); err != nil {
			b.Fatal(err)
		}
	}

	b.ResetTimer()
	b.RunParallel(func(pb *testing.PB) {
		for pb.Next() {
			from := rand.Intn(*numAccounts)
			to := rand.Intn(*numAccounts - 1)
			if from == to {
				to = *numAccounts - 1
			}

			amount := rand.Intn(*maxTransfer)

			update := `
UPDATE bank.accounts
  SET balance = CASE id WHEN $1 THEN balance-$3 WHEN $2 THEN balance+$3 END
  WHERE id IN ($1, $2) AND (SELECT balance >= $3 FROM bank.accounts WHERE id = $1)
`
			if _, err := db.Exec(update, from, to, amount); err != nil {
				if log.V(1) {
					log.Warning(err)
				}
				continue
			}
		}
	})
	b.StopTimer()
}
Ejemplo n.º 15
0
func (is *infoStore) runCallbacks(key string, content roachpb.Value, callbacks ...Callback) {
	// Add the callbacks to the callback work list.
	f := func() {
		for _, method := range callbacks {
			method(key, content)
		}
	}
	is.callbackWorkMu.Lock()
	is.callbackWork = append(is.callbackWork, f)
	is.callbackWorkMu.Unlock()

	// Run callbacks in a goroutine to avoid mutex reentry. We also guarantee
	// callbacks are run in order such that if a key is updated twice in
	// succession, the second callback will never be run before the first.
	if err := is.stopper.RunAsyncTask(func() {
		// Grab the callback mutex to serialize execution of the callbacks.
		is.callbackMu.Lock()
		defer is.callbackMu.Unlock()

		// Grab and execute the list of work.
		is.callbackWorkMu.Lock()
		work := is.callbackWork
		is.callbackWork = nil
		is.callbackWorkMu.Unlock()

		for _, w := range work {
			w()
		}
	}); err != nil {
		log.Warning(err)
	}
}
Ejemplo n.º 16
0
func (t *tableState) acquire(txn *client.Txn, version DescriptorVersion, store LeaseStore) (*LeaseState, *roachpb.Error) {
	t.mu.Lock()
	defer t.mu.Unlock()

	for {
		s := t.active.findNewest(version)
		if s != nil {
			if version != 0 && s != t.active.findNewest(0) {
				// If a lease was requested for an old version of the descriptor,
				// return it even if there is only a short time left before it
				// expires. We can't renew this lease as doing so would violate the
				// invariant that we only get leases on the newest version. The
				// transaction will either finish before the lease expires or it will
				// abort, which is what will happen if we returned an error here.
				s.refcount++
				if log.V(3) {
					log.Infof("acquire: descID=%d version=%d refcount=%d", s.ID, s.Version, s.refcount)
				}
				return s, nil
			}
			minDesiredExpiration := store.clock.Now().GoTime().Add(MinLeaseDuration)
			if s.expiration.After(minDesiredExpiration) {
				s.refcount++
				if log.V(3) {
					log.Infof("acquire: descID=%d version=%d refcount=%d", s.ID, s.Version, s.refcount)
				}
				return s, nil
			}
		} else if version != 0 {
			n := t.active.findNewest(0)
			if n != nil && version < n.Version {
				return nil, roachpb.NewErrorf("table %d unable to acquire lease on old version: %d < %d",
					t.id, version, n.Version)
			}
		}

		if t.acquiring != nil {
			// There is already a lease acquisition in progress. Wait for it to complete.
			t.acquireWait()
		} else {
			// There is no active lease acquisition so we'll go ahead and perform
			// one.
			t.acquiring = make(chan struct{})
			s, pErr := t.acquireNodeLease(txn, version, store)
			close(t.acquiring)
			t.acquiring = nil
			if pErr != nil {
				return nil, pErr
			}
			t.active.insert(s)
			if err := t.releaseNonLatest(store); err != nil {
				log.Warning(err)
			}
		}

		// A new lease was added, so loop and perform the lookup again.
	}
}
Ejemplo n.º 17
0
// GetStatusSummaries returns a status summary messages for the node, along with
// a status summary for every individual store within the node.
func (nsr *NodeStatusRecorder) GetStatusSummaries() (*NodeStatus, []storage.StoreStatus) {
	nsr.RLock()
	defer nsr.RUnlock()

	if nsr.desc.NodeID == 0 {
		// We haven't yet processed initialization information; do nothing.
		if log.V(1) {
			log.Warning("NodeStatusRecorder.GetStatusSummaries called before StartNode event received.")
		}
		return nil, nil
	}

	now := nsr.clock.PhysicalNow()

	// Generate an node status with no store data.
	nodeStat := &NodeStatus{
		Desc:      nsr.desc,
		UpdatedAt: now,
		StartedAt: nsr.startedAt,
		StoreIDs:  make([]roachpb.StoreID, 0, nsr.lastSummaryCount),
	}

	storeStats := make([]storage.StoreStatus, 0, nsr.lastSummaryCount)
	// Generate status summaries for stores, while accumulating data into the
	// NodeStatus.
	nsr.visitStoreMonitors(func(ssm *StoreStatusMonitor) {
		// Accumulate per-store values into node status.
		// TODO(mrtracy): A number of the fields on the protocol buffer are
		// Int32s when they would be more easily represented as Int64.
		nodeStat.StoreIDs = append(nodeStat.StoreIDs, ssm.ID)
		nodeStat.Stats.Add(&ssm.stats)
		nodeStat.RangeCount += int32(ssm.rangeCount.Count())
		nodeStat.LeaderRangeCount += int32(ssm.leaderRangeCount.Value())
		nodeStat.ReplicatedRangeCount += int32(ssm.replicatedRangeCount.Value())
		nodeStat.AvailableRangeCount += int32(ssm.availableRangeCount.Value())

		// Its difficult to guarantee that we have the store descriptor yet; we
		// may not have processed a StoreStatusEvent yet for this store. Just
		// skip the store summary in this case.
		if ssm.desc == nil {
			return
		}
		status := storage.StoreStatus{
			Desc:                 *ssm.desc,
			NodeID:               nsr.desc.NodeID,
			UpdatedAt:            now,
			StartedAt:            ssm.startedAt,
			Stats:                ssm.stats,
			RangeCount:           int32(ssm.rangeCount.Count()),
			LeaderRangeCount:     int32(ssm.leaderRangeCount.Value()),
			ReplicatedRangeCount: int32(ssm.replicatedRangeCount.Value()),
			AvailableRangeCount:  int32(ssm.availableRangeCount.Value()),
		}
		storeStats = append(storeStats, status)
	})
	return nodeStat, storeStats
}
Ejemplo n.º 18
0
// TestTxnMultipleCoord checks that a coordinator uses the Writing flag to
// enforce that only one coordinator can be used for transactional writes.
func TestTxnMultipleCoord(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := createTestDB(t)
	defer s.Stop()

	for i, tc := range []struct {
		call    proto.Call
		writing bool
		ok      bool
	}{
		{proto.GetCall(proto.Key("a")), true, true},
		{proto.GetCall(proto.Key("a")), false, true},
		{proto.PutCall(proto.Key("a"), proto.Value{}), false, true},
		{proto.PutCall(proto.Key("a"), proto.Value{}), true, false},
	} {
		{
			txn := newTxn(s.Clock, proto.Key("a"))
			txn.Writing = tc.writing
			tc.call.Args.Header().Txn = txn
		}
		err := sendCall(s.Sender, tc.call)
		if err == nil != tc.ok {
			t.Errorf("%d: %T (writing=%t): success_expected=%t, but got: %v",
				i, tc.call.Args, tc.writing, tc.ok, err)
		}
		if err != nil {
			continue
		}

		txn := tc.call.Reply.Header().Txn
		// The transaction should come back rw if it started rw or if we just
		// wrote.
		isWrite := proto.IsTransactionWrite(tc.call.Args)
		if (tc.writing || isWrite) != txn.Writing {
			t.Errorf("%d: unexpected writing state: %s", i, txn)
		}
		if !isWrite {
			continue
		}
		// Abort for clean shutdown.
		etReply := &proto.EndTransactionResponse{}
		if err := sendCall(s.Sender, proto.Call{
			Args: &proto.EndTransactionRequest{
				RequestHeader: proto.RequestHeader{
					Key:       txn.Key,
					Timestamp: txn.Timestamp,
					Txn:       txn,
				},
				Commit: false,
			},
			Reply: etReply,
		}); err != nil {
			log.Warning(err)
			t.Fatal(err)
		}
	}
}
Ejemplo n.º 19
0
// GetTimeSeriesData returns a slice of interesting TimeSeriesData from the
// encapsulated NodeStatusMonitor.
func (nsr *NodeStatusRecorder) GetTimeSeriesData() []ts.TimeSeriesData {
	nsr.RLock()
	defer nsr.RUnlock()

	if nsr.desc.NodeID == 0 {
		// We haven't yet processed initialization information; do nothing.
		if log.V(1) {
			log.Warning("NodeStatusRecorder.GetTimeSeriesData called before StartNode event received.")
		}
		return nil
	}
	if nsr.source == "" {
		nsr.source = strconv.FormatInt(int64(nsr.desc.NodeID), 10)
	}

	data := make([]ts.TimeSeriesData, 0, nsr.lastDataCount)

	// Record node stats.
	now := nsr.clock.PhysicalNow()
	data = append(data, nsr.recordInt(now, "calls.success", atomic.LoadInt64(&nsr.callCount)))
	data = append(data, nsr.recordInt(now, "calls.error", atomic.LoadInt64(&nsr.callErrors)))

	// Record per store stats.
	nsr.visitStoreMonitors(func(ssm *StoreStatusMonitor) {
		now := nsr.clock.PhysicalNow()
		ssr := storeStatusRecorder{
			StoreStatusMonitor: ssm,
			timestampNanos:     now,
			source:             strconv.FormatInt(int64(ssm.ID), 10),
		}
		data = append(data, ssr.recordInt("livebytes", ssr.stats.LiveBytes))
		data = append(data, ssr.recordInt("keybytes", ssr.stats.KeyBytes))
		data = append(data, ssr.recordInt("valbytes", ssr.stats.ValBytes))
		data = append(data, ssr.recordInt("intentbytes", ssr.stats.IntentBytes))
		data = append(data, ssr.recordInt("livecount", ssr.stats.LiveCount))
		data = append(data, ssr.recordInt("keycount", ssr.stats.KeyCount))
		data = append(data, ssr.recordInt("valcount", ssr.stats.ValCount))
		data = append(data, ssr.recordInt("intentcount", ssr.stats.IntentCount))
		data = append(data, ssr.recordInt("intentage", ssr.stats.IntentAge))
		data = append(data, ssr.recordInt("gcbytesage", ssr.stats.GCBytesAge))
		data = append(data, ssr.recordInt("lastupdatenanos", ssr.stats.LastUpdateNanos))
		data = append(data, ssr.recordInt("ranges", ssr.rangeCount))
		data = append(data, ssr.recordInt("ranges.leader", int64(ssr.leaderRangeCount)))
		data = append(data, ssr.recordInt("ranges.replicated", int64(ssr.replicatedRangeCount)))
		data = append(data, ssr.recordInt("ranges.available", int64(ssr.availableRangeCount)))

		// Record statistics from descriptor.
		if ssr.desc != nil {
			capacity := ssr.desc.Capacity
			data = append(data, ssr.recordInt("capacity", int64(capacity.Capacity)))
			data = append(data, ssr.recordInt("capacity.available", int64(capacity.Available)))
		}
	})
	nsr.lastDataCount = len(data)
	return data
}
Ejemplo n.º 20
0
// releaseLeases implements the SchemaAccessor interface.
func (p *planner) releaseLeases() {
	if p.leases != nil {
		for _, lease := range p.leases {
			if err := p.leaseMgr.Release(lease); err != nil {
				log.Warning(err)
			}
		}
		p.leases = nil
	}
}
Ejemplo n.º 21
0
// Slice returns the data stored in the underlying storage.
func (es *SampleStorage) Slice() []interface{} {
	startKey := MakeKey(es.prefix, keyDataPrefix)
	res, err := es.engine.Scan(
		startKey, PrefixEndKey(startKey), es.Seen())
	if err != nil {
		log.Warning(err)
		return nil
	}

	sl := make([]interface{}, len(res))
	for i, kv := range res {
		if dv, err := encoding.GobDecode(kv.Value); err == nil {
			sl[i] = dv
		} else {
			log.Warning(err)
		}
	}
	return sl
}
Ejemplo n.º 22
0
func (p *planner) releaseLeases(db client.DB) {
	if p.leases != nil {
		for _, lease := range p.leases {
			if pErr := p.leaseMgr.Release(lease); pErr != nil {
				log.Warning(pErr)
			}
		}
		p.leases = nil
	}
}
Ejemplo n.º 23
0
// RegisterCallback registers a callback for a key pattern to be
// invoked whenever new info for a gossip key matching pattern is
// received. The callback method is invoked with the info key which
// matched pattern.
func (g *Gossip) RegisterCallback(pattern string, method Callback) {
	if pattern == KeySystemConfig {
		log.Warning("raw gossip callback registered on %s, consider using RegisterSystemConfigCallback",
			KeySystemConfig)
	}

	g.mu.Lock()
	defer g.mu.Unlock()
	g.is.registerCallback(pattern, method)
}
Ejemplo n.º 24
0
func logLinuxStats() {
	if !log.V(1) {
		return
	}

	// We don't know which fields in struct mallinfo are most relevant to us yet,
	// so log it all for now.
	//
	// A major caveat is that mallinfo() returns stats only for the main arena.
	// glibc uses multiple allocation arenas to increase malloc performance for
	// multithreaded processes, so mallinfo may not report on significant parts
	// of the heap.
	mi := C.mallinfo()
	log.Infof("mallinfo stats: ordblks=%s, smblks=%s, hblks=%s, hblkhd=%s, usmblks=%s, fsmblks=%s, "+
		"uordblks=%s, fordblks=%s, keepcost=%s",
		humanize.IBytes(uint64(mi.ordblks)),
		humanize.IBytes(uint64(mi.smblks)),
		humanize.IBytes(uint64(mi.hblks)),
		humanize.IBytes(uint64(mi.hblkhd)),
		humanize.IBytes(uint64(mi.usmblks)),
		humanize.IBytes(uint64(mi.fsmblks)),
		humanize.IBytes(uint64(mi.uordblks)),
		humanize.IBytes(uint64(mi.fordblks)),
		humanize.IBytes(uint64(mi.fsmblks)))

	// malloc_info() emits a *lot* of XML, partly because it generates stats for
	// all arenas, unlike mallinfo().
	//
	// TODO(cdo): extract the useful bits and record to time-series DB.
	if !log.V(2) {
		return
	}

	// Create a memstream and make malloc_info() output to it.
	var buf *C.char
	var bufSize C.size_t
	memstream := C.open_memstream(&buf, &bufSize)
	if memstream == nil {
		log.Warning("couldn't open memstream")
		return
	}
	defer func() {
		C.fclose(memstream)
		C.free(unsafe.Pointer(buf))
	}()
	if rc := C.malloc_info(0, memstream); rc != 0 {
		log.Warningf("malloc_info returned %d", rc)
		return
	}
	if rc := C.fflush(memstream); rc != 0 {
		log.Warningf("fflush returned %d", rc)
		return
	}
	log.Infof("malloc_info: %s", C.GoString(buf))
}
Ejemplo n.º 25
0
// executeCmd interprets the given message as a *roachpb.BatchRequest and sends it
// via the local sender.
func (n *Node) executeCmd(argsI proto.Message) (proto.Message, error) {
	ba := argsI.(*roachpb.BatchRequest)
	var br *roachpb.BatchResponse
	opName := "node " + strconv.Itoa(int(n.Descriptor.NodeID)) // could save allocs here

	fail := func(err error) {
		br = &roachpb.BatchResponse{}
		br.Error = roachpb.NewError(err)
	}

	f := func() {
		sp, err := tracing.JoinOrNew(n.ctx.Tracer, ba.Trace, opName)
		if err != nil {
			fail(err)
			return
		}
		// If this is a snowball span, it gets special treatment: It skips the
		// regular tracing machinery, and we instead send the collected spans
		// back with the response. This is more expensive, but then again,
		// those are individual requests traced by users, so they can be.
		if sp.BaggageItem(tracing.Snowball) != "" {
			if sp, err = tracing.JoinOrNewSnowball(opName, ba.Trace, func(rawSpan basictracer.RawSpan) {
				encSp, err := tracing.EncodeRawSpan(&rawSpan, nil)
				if err != nil {
					log.Warning(err)
				}
				br.CollectedSpans = append(br.CollectedSpans, encSp)
			}); err != nil {
				fail(err)
				return
			}
		}
		defer sp.Finish()
		ctx := opentracing.ContextWithSpan((*Node)(n).context(), sp)

		tStart := time.Now()
		var pErr *roachpb.Error
		br, pErr = n.stores.Send(ctx, *ba)
		if pErr != nil {
			br = &roachpb.BatchResponse{}
			sp.LogEvent(fmt.Sprintf("error: %T", pErr.GetDetail()))
		}
		if br.Error != nil {
			panic(roachpb.ErrorUnexpectedlySet(n.stores, br))
		}
		n.metrics.callComplete(time.Now().Sub(tStart), pErr)
		br.Error = pErr
	}

	if !n.stopper.RunTask(f) {
		return nil, util.Errorf("node %d stopped", n.Descriptor.NodeID)
	}
	return br, nil
}
Ejemplo n.º 26
0
// shouldQueue determines whether a range should be queued for truncating. This
// is true only if the replica is the raft leader and if the total number of
// the range's raft log's stale entries exceeds RaftLogQueueStaleThreshold.
func (*raftLogQueue) shouldQueue(now roachpb.Timestamp, r *Replica, _ *config.SystemConfig) (shouldQ bool,
	priority float64) {

	truncatableIndexes, _, err := getTruncatableIndexes(r)
	if err != nil {
		log.Warning(err)
		return false, 0
	}

	return truncatableIndexes > RaftLogQueueStaleThreshold, float64(truncatableIndexes)
}
Ejemplo n.º 27
0
func (s *state) stop() {
	log.V(6).Infof("node %v stopping", s.nodeID)
	for _, n := range s.nodes {
		err := n.client.conn.Close()
		if err != nil {
			log.Warning("error stopping client:", err)
		}
	}
	s.writeTask.stop()
	close(s.stopped)
}
Ejemplo n.º 28
0
// RefreshLeases starts a goroutine that refreshes the lease manager
// leases for tables received in the latest system configuration via gossip.
func (m *LeaseManager) RefreshLeases(s *stop.Stopper, db *client.DB, gossip *gossip.Gossip) {
	s.RunWorker(func() {
		descKeyPrefix := keys.MakeTablePrefix(uint32(DescriptorTable.ID))
		gossip.RegisterSystemConfigCallback(m.updateSystemConfig)
		for {
			select {
			case <-m.newConfig:
				// Read all tables and their versions
				cfg := m.getSystemConfig()
				if log.V(2) {
					log.Info("received a new config %v", cfg)
				}

				// Loop through the configuration to find all the tables.
				for _, kv := range cfg.Values {
					if kv.Value.Tag != roachpb.ValueType_BYTES {
						continue
					}

					if !bytes.HasPrefix(kv.Key, descKeyPrefix) {
						continue
					}
					// Attempt to unmarshal config into a table/database descriptor.
					var descriptor Descriptor
					if err := kv.Value.GetProto(&descriptor); err != nil {
						log.Warningf("unable to unmarshal descriptor %v", kv.Value)
						continue
					}
					switch union := descriptor.Union.(type) {
					case *Descriptor_Table:
						table := union.Table
						if err := table.Validate(); err != nil {
							log.Errorf("received invalid table descriptor: %v", table)
							continue
						}
						if log.V(2) {
							log.Infof("refreshing lease table: %d, version: %d", table.ID, table.Version)
						}
						// Try to refresh the table lease to one >= this version.
						if err := m.refreshLease(db, table.ID, table.Version); err != nil {
							log.Warning(err)
						}

					case *Descriptor_Database:
						// Ignore.
					}
				}

			case <-s.ShouldStop():
				return
			}
		}
	})
}
Ejemplo n.º 29
0
// Put adds an element to the storage at the index specified.
func (es *SampleStorage) Put(i int, v interface{}) {
	if i < 0 || i >= es.Size() {
		return
	}
	err := PutI(es.engine, es.indexToKey(i), &v)
	if err != nil {
		log.Warning(err)
	} else {
		es.incVar(keySeen, 1)
	}
}
Ejemplo n.º 30
0
// GetStatusSummary returns a status summary messages for the node. The summary
// includes the recent values of metrics for both the node and all of its
// component stores.
func (mr *MetricsRecorder) GetStatusSummary() *NodeStatus {
	mr.mu.Lock()
	defer mr.mu.Unlock()

	if mr.mu.nodeRegistry == nil {
		// We haven't yet processed initialization information; do nothing.
		if log.V(1) {
			log.Warning(context.TODO(), "MetricsRecorder.GetStatusSummary called before NodeID allocation.")
		}
		return nil
	}

	now := mr.mu.clock.PhysicalNow()

	// Generate an node status with no store data.
	nodeStat := &NodeStatus{
		Desc:          mr.mu.desc,
		BuildInfo:     build.GetInfo(),
		UpdatedAt:     now,
		StartedAt:     mr.mu.startedAt,
		StoreStatuses: make([]StoreStatus, 0, mr.mu.lastSummaryCount),
		Metrics:       make(map[string]float64, mr.mu.lastNodeMetricCount),
	}

	eachRecordableValue(mr.mu.nodeRegistry, func(name string, val float64) {
		nodeStat.Metrics[name] = val
	})

	// Generate status summaries for stores.
	for storeID, r := range mr.mu.storeRegistries {
		storeMetrics := make(map[string]float64, mr.mu.lastStoreMetricCount)
		eachRecordableValue(r, func(name string, val float64) {
			storeMetrics[name] = val
		})

		// Gather descriptor from store.
		descriptor, err := mr.mu.stores[storeID].Descriptor()
		if err != nil {
			log.Errorf(context.TODO(), "Could not record status summaries: Store %d could not return descriptor, error: %s", storeID, err)
			continue
		}

		nodeStat.StoreStatuses = append(nodeStat.StoreStatuses, StoreStatus{
			Desc:    *descriptor,
			Metrics: storeMetrics,
		})
	}
	mr.mu.lastSummaryCount = len(nodeStat.StoreStatuses)
	mr.mu.lastNodeMetricCount = len(nodeStat.Metrics)
	if len(nodeStat.StoreStatuses) > 0 {
		mr.mu.lastStoreMetricCount = len(nodeStat.StoreStatuses[0].Metrics)
	}
	return nodeStat
}