Esempio n. 1
0
func convertBatchError(tableDesc *TableDescriptor, b client.Batch, origPErr *roachpb.Error) *roachpb.Error {
	if origPErr.Index == nil {
		return origPErr
	}
	index := origPErr.Index.Index
	if index >= int32(len(b.Results)) {
		panic(fmt.Sprintf("index %d outside of results: %+v", index, b.Results))
	}
	result := b.Results[index]
	if _, ok := origPErr.GoError().(*roachpb.ConditionFailedError); ok {
		for _, row := range result.Rows {
			indexID, key, pErr := decodeIndexKeyPrefix(tableDesc, row.Key)
			if pErr != nil {
				return pErr
			}
			index, pErr := tableDesc.FindIndexByID(indexID)
			if pErr != nil {
				return pErr
			}
			valTypes, pErr := makeKeyVals(tableDesc, index.ColumnIDs)
			if pErr != nil {
				return pErr
			}
			vals := make([]parser.Datum, len(valTypes))
			if _, pErr := decodeKeyVals(valTypes, vals, key); pErr != nil {
				return pErr
			}

			return roachpb.NewError(errUniquenessConstraintViolation{index: index, vals: vals})
		}
	}
	return origPErr
}
Esempio n. 2
0
// executeCmd interprets the given message as a *roachpb.BatchRequest and sends it
// via the local sender.
func (n *Node) executeCmd(argsI proto.Message) (proto.Message, error) {
	ba := argsI.(*roachpb.BatchRequest)
	var br *roachpb.BatchResponse

	f := func() {
		// TODO(tschottdorf) get a hold of the client's ID, add it to the
		// context before dispatching, and create an ID for tracing the request.
		trace := n.ctx.Tracer.NewTrace(tracer.Node, ba)
		defer trace.Finalize()
		defer trace.Epoch("node")()
		ctx := tracer.ToCtx((*Node)(n).context(), trace)

		tStart := time.Now()
		var pErr *roachpb.Error
		br, pErr = n.stores.Send(ctx, *ba)
		if pErr != nil {
			br = &roachpb.BatchResponse{}
			trace.Event(fmt.Sprintf("error: %T", pErr.GoError()))
		}
		if br.Error != nil {
			panic(roachpb.ErrorUnexpectedlySet(n.stores, br))
		}
		n.feed.CallComplete(*ba, time.Now().Sub(tStart), pErr)
		br.Error = pErr
	}

	if !n.stopper.RunTask(f) {
		return nil, util.Errorf("node %d stopped", n.Descriptor.NodeID)
	}
	return br, nil
}
Esempio n. 3
0
// If we hit an error and there is a pending transaction, rollback
// the transaction before returning. The client does not have to
// deal with cleaning up transaction state.
func makeResultFromError(planMaker *planner, pErr *roachpb.Error) Result {
	if planMaker.txn != nil {
		if _, ok := pErr.GoError().(*roachpb.SqlTransactionAbortedError); !ok {
			planMaker.txn.Cleanup(pErr)
		}
	}
	return Result{Err: pErr.GoError()}
}
Esempio n. 4
0
// If we hit an error and there is a pending transaction, rollback
// the transaction before returning. The client does not have to
// deal with cleaning up transaction state.
func makeResultFromError(planMaker *planner, pErr *roachpb.Error) driver.Response_Result {
	if planMaker.txn != nil {
		if _, ok := pErr.GoError().(*roachpb.SqlTransactionAbortedError); !ok {
			planMaker.txn.Cleanup(pErr)
		}
	}
	errString := pErr.GoError().Error()
	return driver.Response_Result{Error: &errString}
}
Esempio n. 5
0
// TestTxnCoordSenderEndTxn verifies that ending a transaction
// sends resolve write intent requests and removes the transaction
// from the txns map.
func TestTxnCoordSenderEndTxn(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := createTestDB(t)
	defer s.Stop()

	// 4 cases: no deadline, past deadline, equal deadline, future deadline.
	for i := 0; i < 4; i++ {
		key := roachpb.Key("key: " + strconv.Itoa(i))
		txn := client.NewTxn(*s.DB)
		// Initialize the transaction
		if pErr := txn.Put(key, []byte("value")); pErr != nil {
			t.Fatal(pErr)
		}

		{
			var pErr *roachpb.Error
			switch i {
			case 0:
				// No deadline.
				pErr = txn.Commit()
			case 1:
				// Past deadline.
				pErr = txn.CommitBy(txn.Proto.Timestamp.Prev())
			case 2:
				// Equal deadline.
				pErr = txn.CommitBy(txn.Proto.Timestamp)
			case 3:
				// Future deadline.
				pErr = txn.CommitBy(txn.Proto.Timestamp.Next())
			}

			switch i {
			case 0:
				// No deadline.
				if pErr != nil {
					t.Error(pErr)
				}
			case 1:
				// Past deadline.
				if _, ok := pErr.GoError().(*roachpb.TransactionAbortedError); !ok {
					t.Errorf("expected TransactionAbortedError but got %T: %s", pErr, pErr)
				}
			case 2:
				// Equal deadline.
				if pErr != nil {
					t.Error(pErr)
				}
			case 3:
				// Future deadline.
				if pErr != nil {
					t.Error(pErr)
				}
			}
		}
		verifyCleanup(key, s.Sender, s.Eng, t)
	}
}
Esempio n. 6
0
func encodeDTuple(b []byte, d parser.DTuple) ([]byte, error) {
	for _, val := range d {
		var pErr *roachpb.Error
		b, pErr = encodeDatum(b, val)
		if pErr != nil {
			return nil, pErr.GoError()
		}
	}
	return b, nil
}
Esempio n. 7
0
// Responses with write-too-old, write-intent and not leader errors
// are retried on the server, and so are not recorded in the sequence
// cache in the hopes of retrying to a successful outcome.
func (sc *SequenceCache) shouldCacheError(pErr *roachpb.Error) bool {
	switch pErr.GoError().(type) {
	case *roachpb.WriteTooOldError, *roachpb.WriteIntentError, *roachpb.NotLeaderError, *roachpb.RangeKeyMismatchError:
		return false
	}
	return true
}
Esempio n. 8
0
func convertBatchError(tableDesc *sqlbase.TableDescriptor, b client.Batch, origPErr *roachpb.Error) error {
	if origPErr.Index == nil {
		return origPErr.GoError()
	}
	index := origPErr.Index.Index
	if index >= int32(len(b.Results)) {
		panic(fmt.Sprintf("index %d outside of results: %+v", index, b.Results))
	}
	result := b.Results[index]
	var alloc sqlbase.DatumAlloc
	if _, ok := origPErr.GetDetail().(*roachpb.ConditionFailedError); ok {
		for _, row := range result.Rows {
			indexID, key, err := sqlbase.DecodeIndexKeyPrefix(tableDesc, row.Key)
			if err != nil {
				return err
			}
			index, err := tableDesc.FindIndexByID(indexID)
			if err != nil {
				return err
			}
			valTypes, err := sqlbase.MakeKeyVals(tableDesc, index.ColumnIDs)
			if err != nil {
				return err
			}
			dirs := make([]encoding.Direction, 0, len(index.ColumnIDs))
			for _, dir := range index.ColumnDirections {
				convertedDir, err := dir.ToEncodingDirection()
				if err != nil {
					return err
				}
				dirs = append(dirs, convertedDir)
			}
			vals := make([]parser.Datum, len(valTypes))
			if _, err := sqlbase.DecodeKeyVals(&alloc, valTypes, vals, dirs, key); err != nil {
				return err
			}

			return &errUniquenessConstraintViolation{index: index, vals: vals}
		}
	}
	return origPErr.GoError()
}
Esempio n. 9
0
// CallComplete is called by a node whenever it completes a request. This will
// publish appropriate events to the feed:
// - For a successful request, a corresponding event for each request in the batch,
// - on error without index information, a failure of the Batch, and
// - on an indexed error a failure of the individual request.
func (nef NodeEventFeed) CallComplete(ba roachpb.BatchRequest, pErr *roachpb.Error) {
	if pErr != nil && pErr.TransactionRestart == roachpb.TransactionRestart_ABORT {
		method := roachpb.Batch
		if iErr, ok := pErr.GoError().(roachpb.IndexedError); ok {
			if index, ok := iErr.ErrorIndex(); ok {
				method = ba.Requests[index].GetInner().Method()
			}
		}
		nef.f.Publish(&CallErrorEvent{
			NodeID: nef.id,
			Method: method,
		})
		return
	}
	for _, union := range ba.Requests {
		nef.f.Publish(&CallSuccessEvent{
			NodeID: nef.id,
			Method: union.GetInner().Method(),
		})
	}
}
Esempio n. 10
0
func (b *Batch) fillResults(br *roachpb.BatchResponse, pErr *roachpb.Error) error {
	offset := 0
	for i := range b.Results {
		result := &b.Results[i]

		for k := 0; k < result.calls; k++ {
			args := b.reqs[offset+k]

			var reply roachpb.Response
			if result.Err == nil {
				result.Err = pErr.GoError()
				if result.Err == nil {
					if br != nil && offset+k < len(br.Responses) {
						reply = br.Responses[offset+k].GetInner()
					} else if args.Method() != roachpb.EndTransaction {
						// TODO(tschottdorf): EndTransaction is excepted here
						// because it may be elided (r/o txns). Might prefer to
						// simulate an EndTransaction response instead; this
						// effectively just leaks here.
						panic("not enough responses for calls")
					}
				}
			}

			switch req := args.(type) {
			case *roachpb.GetRequest:
				row := &result.Rows[k]
				row.Key = []byte(req.Key)
				if result.Err == nil {
					row.Value = reply.(*roachpb.GetResponse).Value
				}
			case *roachpb.PutRequest:
				row := &result.Rows[k]
				row.Key = []byte(req.Key)
				if result.Err == nil {
					row.Value = &req.Value
					row.setTimestamp(reply.(*roachpb.PutResponse).Timestamp)
				}
			case *roachpb.ConditionalPutRequest:
				row := &result.Rows[k]
				row.Key = []byte(req.Key)
				if result.Err == nil {
					row.Value = &req.Value
					row.setTimestamp(reply.(*roachpb.ConditionalPutResponse).Timestamp)
				}
			case *roachpb.IncrementRequest:
				row := &result.Rows[k]
				row.Key = []byte(req.Key)
				if result.Err == nil {
					t := reply.(*roachpb.IncrementResponse)
					row.Value = &roachpb.Value{}
					row.Value.SetInt(t.NewValue)
					row.setTimestamp(t.Timestamp)
				}
			case *roachpb.ScanRequest:
				if result.Err == nil {
					t := reply.(*roachpb.ScanResponse)
					result.Rows = make([]KeyValue, len(t.Rows))
					for j := range t.Rows {
						src := &t.Rows[j]
						dst := &result.Rows[j]
						dst.Key = src.Key
						dst.Value = &src.Value
					}
				}
			case *roachpb.ReverseScanRequest:
				if result.Err == nil {
					t := reply.(*roachpb.ReverseScanResponse)
					result.Rows = make([]KeyValue, len(t.Rows))
					for j := range t.Rows {
						src := &t.Rows[j]
						dst := &result.Rows[j]
						dst.Key = src.Key
						dst.Value = &src.Value
					}
				}
			case *roachpb.DeleteRequest:
				row := &result.Rows[k]
				row.Key = []byte(args.(*roachpb.DeleteRequest).Key)

			case *roachpb.DeleteRangeRequest:
			case *roachpb.BeginTransactionRequest:
			case *roachpb.EndTransactionRequest:
			case *roachpb.AdminMergeRequest:
			case *roachpb.AdminSplitRequest:
			case *roachpb.HeartbeatTxnRequest:
			case *roachpb.GCRequest:
			case *roachpb.PushTxnRequest:
			case *roachpb.RangeLookupRequest:
			case *roachpb.ResolveIntentRequest:
			case *roachpb.ResolveIntentRangeRequest:
			case *roachpb.MergeRequest:
			case *roachpb.TruncateLogRequest:
			case *roachpb.LeaderLeaseRequest:
				// Nothing to do for these methods as they do not generate any
				// rows.

			default:
				if result.Err == nil {
					result.Err = fmt.Errorf("unsupported reply: %T", reply)
				}
			}
		}
		offset += result.calls
	}

	for i := range b.Results {
		result := &b.Results[i]
		if result.Err != nil {
			return result.Err
		}
	}
	return nil
}
Esempio n. 11
0
// sendChunk is in charge of sending an "admissible" piece of batch, i.e. one
// which doesn't need to be subdivided further before going to a range (so no
// mixing of forward and reverse scans, etc). The parameters and return values
// correspond to client.Sender with the exception of the returned boolean,
// which is true when indicating that the caller should retry but needs to send
// EndTransaction in a separate request.
func (ds *DistSender) sendChunk(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error, bool) {
	isReverse := ba.IsReverse()

	trace := tracer.FromCtx(ctx)

	// The minimal key range encompassing all requests contained within.
	// Local addressing has already been resolved.
	// TODO(tschottdorf): consider rudimentary validation of the batch here
	// (for example, non-range requests with EndKey, or empty key ranges).
	rs := keys.Range(ba)
	var br *roachpb.BatchResponse
	// Send the request to one range per iteration.
	for {
		considerIntents := false
		var curReply *roachpb.BatchResponse
		var desc *roachpb.RangeDescriptor
		var needAnother bool
		var pErr *roachpb.Error
		for r := retry.Start(ds.rpcRetryOptions); r.Next(); {
			// Get range descriptor (or, when spanning range, descriptors). Our
			// error handling below may clear them on certain errors, so we
			// refresh (likely from the cache) on every retry.
			descDone := trace.Epoch("meta descriptor lookup")
			var evictDesc func()
			desc, needAnother, evictDesc, pErr = ds.getDescriptors(rs, considerIntents, isReverse)
			descDone()

			// getDescriptors may fail retryably if the first range isn't
			// available via Gossip.
			if pErr != nil {
				if pErr.Retryable {
					if log.V(1) {
						log.Warning(pErr)
					}
					continue
				}
				break
			}

			if needAnother && br == nil {
				// TODO(tschottdorf): we should have a mechanism for discovering
				// range merges (descriptor staleness will mostly go unnoticed),
				// or we'll be turning single-range queries into multi-range
				// queries for no good reason.

				// If there's no transaction and op spans ranges, possibly
				// re-run as part of a transaction for consistency. The
				// case where we don't need to re-run is if the read
				// consistency is not required.
				if ba.Txn == nil && ba.IsPossibleTransaction() &&
					ba.ReadConsistency != roachpb.INCONSISTENT {
					return nil, roachpb.NewError(&roachpb.OpRequiresTxnError{}), false
				}
				// If the request is more than but ends with EndTransaction, we
				// want the caller to come again with the EndTransaction in an
				// extra call.
				if l := len(ba.Requests) - 1; l > 0 && ba.Requests[l].GetInner().Method() == roachpb.EndTransaction {
					return nil, roachpb.NewError(errors.New("cannot send 1PC txn to multiple ranges")), true /* shouldSplitET */
				}
			}

			// It's possible that the returned descriptor misses parts of the
			// keys it's supposed to scan after it's truncated to match the
			// descriptor. Example revscan [a,g), first desc lookup for "g"
			// returns descriptor [c,d) -> [d,g) is never scanned.
			// We evict and retry in such a case.
			if (isReverse && !desc.ContainsKeyRange(desc.StartKey, rs.EndKey)) || (!isReverse && !desc.ContainsKeyRange(rs.Key, desc.EndKey)) {
				evictDesc()
				continue
			}

			curReply, pErr = func() (*roachpb.BatchResponse, *roachpb.Error) {
				// Truncate the request to our current key range.
				intersected, iErr := rs.Intersect(desc)
				if iErr != nil {
					return nil, roachpb.NewError(iErr)
				}
				truncBA, numActive, trErr := truncate(ba, intersected)
				if numActive == 0 && trErr == nil {
					// This shouldn't happen in the wild, but some tests
					// exercise it.
					return nil, roachpb.NewErrorf("truncation resulted in empty batch on [%s,%s): %s",
						rs.Key, rs.EndKey, ba)
				}
				if trErr != nil {
					return nil, roachpb.NewError(trErr)
				}

				return ds.sendSingleRange(trace, truncBA, desc)
			}()
			// If sending succeeded, break this loop.
			if pErr == nil {
				break
			}

			if log.V(1) {
				log.Warningf("failed to invoke %s: %s", ba, pErr)
			}
			trace.Event(fmt.Sprintf("reply error: %T", pErr.GoError()))

			// Error handling below.
			// If retryable, allow retry. For range not found or range
			// key mismatch errors, we don't backoff on the retry,
			// but reset the backoff loop so we can retry immediately.
			switch tErr := pErr.GoError().(type) {
			case *roachpb.SendError:
				// For an RPC error to occur, we must've been unable to contact
				// any replicas. In this case, likely all nodes are down (or
				// not getting back to us within a reasonable amount of time).
				// We may simply not be trying to talk to the up-to-date
				// replicas, so clearing the descriptor here should be a good
				// idea.
				// TODO(tschottdorf): If a replica group goes dead, this
				// will cause clients to put high read pressure on the first
				// range, so there should be some rate limiting here.
				evictDesc()
				if tErr.CanRetry() {
					continue
				}
			case *roachpb.RangeNotFoundError, *roachpb.RangeKeyMismatchError:
				// Range descriptor might be out of date - evict it.
				evictDesc()
				// On addressing errors, don't backoff; retry immediately.
				r.Reset()
				if log.V(1) {
					log.Warning(tErr)
				}
				// On retries, allow [uncommitted] intents on range descriptor
				// lookups to be returned 50% of the time in order to succeed
				// at finding the transaction record pointed to by the intent
				// itself. The 50% probability of returning either the current
				// intent or the previously committed value balances between
				// the two cases where the intent's txn hasn't yet been
				// committed (the previous value is correct), or the intent's
				// txn has been committed (the intent value is correct).
				considerIntents = true
				continue
			case *roachpb.NotLeaderError:
				newLeader := tErr.Leader
				// Verify that leader is a known replica according to the
				// descriptor. If not, we've got a stale replica; evict cache.
				// Next, cache the new leader.
				if newLeader != nil {
					if i, _ := desc.FindReplica(newLeader.StoreID); i == -1 {
						if log.V(1) {
							log.Infof("error indicates unknown leader %s, expunging descriptor %s", newLeader, desc)
						}
						evictDesc()
					}
				} else {
					newLeader = &roachpb.ReplicaDescriptor{}
				}
				ds.updateLeaderCache(roachpb.RangeID(desc.RangeID), *newLeader)
				if log.V(1) {
					log.Warning(tErr)
				}
				r.Reset()
				continue
			case retry.Retryable:
				if tErr.CanRetry() {
					if log.V(1) {
						log.Warning(tErr)
					}
					continue
				}
			}
			break
		}

		// Immediately return if querying a range failed non-retryably.
		if pErr != nil {
			return nil, pErr, false
		}

		ba.Txn.Update(curReply.Txn)

		if br == nil {
			// First response from a Range.
			br = curReply
		} else {
			// This was the second or later call in a cross-Range request.
			// Combine the new response with the existing one.
			if err := br.Combine(curReply); err != nil {
				return nil, roachpb.NewError(err), false
			}
		}

		// If this request has a bound (such as MaxResults in
		// ScanRequest) and we are going to query at least one more range,
		// check whether enough rows have been retrieved.
		// TODO(tschottdorf): need tests for executing a multi-range batch
		// with various bounded requests which saturate at different times.
		if needAnother {
			// Start with the assumption that all requests are saturated.
			// Below, we look at each and decide whether that's true.
			// Everything that is indeed saturated is "masked out" from the
			// batch request; only if that's all requests does needAnother
			// remain false.
			needAnother = false
			if br == nil {
				// Clone ba.Requests. This is because we're multi-range, and
				// some requests may be bounded, which could lead to them being
				// masked out once they're saturated. We don't want to risk
				// removing requests that way in the "master copy" since that
				// could lead to omitting requests in certain retry scenarios.
				ba.Requests = append([]roachpb.RequestUnion(nil), ba.Requests...)
			}
			for i, union := range ba.Requests {
				args := union.GetInner()
				if _, ok := args.(*roachpb.NoopRequest); ok {
					// NoopRequests are skipped.
					continue
				}
				boundedArg, ok := args.(roachpb.Bounded)
				if !ok {
					// Non-bounded request. We will have to query all ranges.
					needAnother = true
					continue
				}
				prevBound := boundedArg.GetBound()
				cReply, ok := curReply.Responses[i].GetInner().(roachpb.Countable)
				if !ok || prevBound <= 0 {
					// Request bounded, but without max results. Again, will
					// need to query everything we can. The case in which the reply
					// isn't countable occurs when the request wasn't active for
					// that range (since it didn't apply to it), so the response
					// is a NoopResponse.
					needAnother = true
					continue
				}
				nextBound := prevBound - cReply.Count()
				if nextBound <= 0 {
					// We've hit max results for this piece of the batch. Mask
					// it out (we've copied the requests slice above, so this
					// is kosher).
					ba.Requests[i].Reset() // necessary (no one-of?)
					if !ba.Requests[i].SetValue(&roachpb.NoopRequest{}) {
						panic("RequestUnion excludes NoopRequest")
					}
					continue
				}
				// The request isn't saturated yet.
				needAnother = true
				boundedArg.SetBound(nextBound)
			}
		}

		// If this was the last range accessed by this call, exit loop.
		if !needAnother {
			return br, nil, false
		}

		if isReverse {
			// In next iteration, query previous range.
			// We use the StartKey of the current descriptor as opposed to the
			// EndKey of the previous one since that doesn't have bugs when
			// stale descriptors come into play.
			rs.EndKey = prev(ba, desc.StartKey)
		} else {
			// In next iteration, query next range.
			// It's important that we use the EndKey of the current descriptor
			// as opposed to the StartKey of the next one: if the former is stale,
			// it's possible that the next range has since merged the subsequent
			// one, and unless both descriptors are stale, the next descriptor's
			// StartKey would move us to the beginning of the current range,
			// resulting in a duplicate scan.
			rs.Key = next(ba, desc.EndKey)
		}
		trace.Event("querying next range")
	}
}
Esempio n. 12
0
// Query returns datapoints for the named time series during the supplied time
// span.  Data is returned as a series of consecutive data points.
//
// Data is queried only at the Resolution supplied: if data for the named time
// series is not stored at the given resolution, an empty result will be
// returned.
//
// All data stored on the server is downsampled to some degree; the data points
// returned represent the average value within a sample period. Each datapoint's
// timestamp falls in the middle of the sample period it represents.
//
// If data for the named time series was collected from multiple sources, each
// returned datapoint will represent the sum of datapoints from all sources at
// the same time. The returned string slices contains a list of all sources for
// the metric which were aggregated to produce the result.
func (db *DB) Query(query Query, r Resolution, startNanos, endNanos int64) ([]TimeSeriesDatapoint, []string, error) {
	// Normalize startNanos and endNanos the nearest SampleDuration boundary.
	startNanos -= startNanos % r.SampleDuration()

	var rows []client.KeyValue
	if len(query.Sources) == 0 {
		// Based on the supplied timestamps and resolution, construct start and end
		// keys for a scan that will return every key with data relevant to the
		// query.
		startKey := MakeDataKey(query.Name, "" /* source */, r, startNanos)
		endKey := MakeDataKey(query.Name, "" /* source */, r, endNanos).PrefixEnd()
		var pErr *roachpb.Error
		rows, pErr = db.db.ScanInconsistent(startKey, endKey, 0)
		if pErr != nil {
			return nil, nil, pErr.GoError()
		}
	} else {
		b := db.db.NewBatch()
		b.ReadConsistency = roachpb.INCONSISTENT
		// Iterate over all key timestamps which may contain data for the given
		// sources, based on the given start/end time and the resolution.
		kd := r.KeyDuration()
		startKeyNanos := startNanos - (startNanos % kd)
		endKeyNanos := endNanos - (endNanos % kd)
		for currentTimestamp := startKeyNanos; currentTimestamp <= endKeyNanos; currentTimestamp += kd {
			for _, source := range query.Sources {
				key := MakeDataKey(query.Name, source, r, currentTimestamp)
				b.Get(key)
			}
		}
		pErr := db.db.Run(b)
		if pErr != nil {
			return nil, nil, pErr.GoError()
		}
		for _, result := range b.Results {
			row := result.Rows[0]
			if row.Value == nil {
				continue
			}
			rows = append(rows, row)
		}
	}

	// Convert the queried source data into a set of data spans, one for each
	// source.
	sourceSpans, err := makeDataSpans(rows, startNanos)
	if err != nil {
		return nil, nil, err
	}

	// Compute a downsample function which will be used to return values from
	// each source for each sample period.
	downsampler, err := getDownsampleFunction(query.GetDownsampler())
	if err != nil {
		return nil, nil, err
	}

	// If we are returning a derivative, iteration needs to start at offset -1
	// (in order to correctly compute the rate of change at offset 0).
	var startOffset int32
	isDerivative := query.GetDerivative() != TimeSeriesQueryDerivative_NONE
	if isDerivative {
		startOffset = -1
	}

	// Create an interpolatingIterator for each dataSpan, adding each iterator
	// into a unionIterator collection. This is also where we compute a list of
	// all sources with data present in the query.
	sources := make([]string, 0, len(sourceSpans))
	iters := make(unionIterator, 0, len(sourceSpans))
	for name, span := range sourceSpans {
		sources = append(sources, name)
		iters = append(iters, span.newIterator(startOffset, downsampler))
	}

	// Choose an aggregation function to use when taking values from the
	// unionIterator.
	var valueFn func() float64
	switch query.GetSourceAggregator() {
	case TimeSeriesQueryAggregator_SUM:
		valueFn = iters.sum
	case TimeSeriesQueryAggregator_AVG:
		valueFn = iters.avg
	case TimeSeriesQueryAggregator_MAX:
		valueFn = iters.max
	case TimeSeriesQueryAggregator_MIN:
		valueFn = iters.min
	}

	// Iterate over all requested offsets, recording a value from the
	// unionIterator at each offset encountered. If the query is requesting a
	// derivative, a rate of change is recorded instead of the actual values.
	iters.init()
	var last TimeSeriesDatapoint
	if isDerivative {
		last = TimeSeriesDatapoint{
			TimestampNanos: iters.timestamp(),
			Value:          valueFn(),
		}
		// For derivatives, the iterator was initialized at offset -1 in order
		// to calculate the rate of change at offset zero. However, in some
		// cases (such as the very first value recorded) offset -1 is not
		// available. In this case, we treat the rate-of-change at the first
		// offset as zero.
		if iters.offset() < 0 {
			iters.advance()
		}
	}
	var responseData []TimeSeriesDatapoint
	for iters.isValid() && iters.timestamp() <= endNanos {
		current := TimeSeriesDatapoint{
			TimestampNanos: iters.timestamp(),
			Value:          valueFn(),
		}
		response := current
		if isDerivative {
			dTime := (current.TimestampNanos - last.TimestampNanos) / time.Second.Nanoseconds()
			if dTime == 0 {
				response.Value = 0
			} else {
				response.Value = (current.Value - last.Value) / float64(dTime)
			}
			if response.Value < 0 &&
				query.GetDerivative() == TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE {
				response.Value = 0
			}
		}
		responseData = append(responseData, response)
		last = current
		iters.advance()
	}

	return responseData, sources, nil
}
Esempio n. 13
0
// updateState updates the transaction state in both the success and
// error cases, applying those updates to the corresponding txnMeta
// object when adequate. It also updates certain errors with the
// updated transaction for use by client restarts.
func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error {
	trace := tracer.FromCtx(ctx)
	newTxn := &roachpb.Transaction{}
	newTxn.Update(ba.GetTxn())
	// TODO(tamird): remove this clone. It's currently needed to avoid race conditions.
	pErr = proto.Clone(pErr).(*roachpb.Error)
	err := pErr.GoError()
	// TODO(bdarnell): We're writing to errors here (and where using ErrorWithIndex);
	// since there's no concept of ownership copy-on-write is always preferable.
	switch t := err.(type) {
	case nil:
		newTxn.Update(br.Txn)
		// Move txn timestamp forward to response timestamp if applicable.
		// TODO(tschottdorf): see (*Replica).executeBatch and comments within.
		// Looks like this isn't necessary any more, nor did it prevent a bug
		// referenced in a TODO there.
		newTxn.Timestamp.Forward(br.Timestamp)
	case *roachpb.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider txn dead.
		defer tc.cleanupTxn(trace, t.Txn)
	case *roachpb.OpRequiresTxnError:
		panic("OpRequiresTxnError must not happen at this level")
	case *roachpb.ReadWithinUncertaintyIntervalError:
		// Mark the host as certain. See the protobuf comment for
		// Transaction.CertainNodes for details.
		if t.NodeID == 0 {
			panic("no replica set in header on uncertainty restart")
		}
		newTxn.Update(&t.Txn)
		newTxn.CertainNodes.Add(t.NodeID)
		// If the reader encountered a newer write within the uncertainty
		// interval, move the timestamp forward, just past that write or
		// up to MaxTimestamp, whichever comes first.
		candidateTS := newTxn.MaxTimestamp
		candidateTS.Backward(t.ExistingTimestamp.Add(0, 1))
		newTxn.Timestamp.Forward(candidateTS)
		newTxn.Restart(ba.GetUserPriority(), newTxn.Priority, newTxn.Timestamp)
		t.Txn = *newTxn
	case *roachpb.TransactionAbortedError:
		trace.SetError()
		newTxn.Update(&t.Txn)
		// Increase timestamp if applicable.
		newTxn.Timestamp.Forward(t.Txn.Timestamp)
		newTxn.Priority = t.Txn.Priority
		t.Txn = *newTxn
		// Clean up the freshly aborted transaction in defer(), avoiding a
		// race with the state update below.
		defer tc.cleanupTxn(trace, t.Txn)
	case *roachpb.TransactionPushError:
		newTxn.Update(t.Txn)
		// Increase timestamp if applicable, ensuring that we're
		// just ahead of the pushee.
		newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1))
		newTxn.Restart(ba.GetUserPriority(), t.PusheeTxn.Priority-1, newTxn.Timestamp)
		t.Txn = newTxn
	case *roachpb.TransactionRetryError:
		newTxn.Update(&t.Txn)
		newTxn.Restart(ba.GetUserPriority(), t.Txn.Priority, newTxn.Timestamp)
		t.Txn = *newTxn
	case roachpb.TransactionRestartError:
		// Assertion: The above cases should exhaust all ErrorDetails which
		// carry a Transaction.
		if pErr.Detail != nil {
			panic(fmt.Sprintf("unhandled TransactionRestartError %T", err))
		}
	default:
		trace.SetError()
	}

	return func() *roachpb.Error {
		if len(newTxn.ID) <= 0 {
			return pErr
		}
		id := string(newTxn.ID)
		tc.Lock()
		defer tc.Unlock()
		txnMeta := tc.txns[id]
		// For successful transactional requests, keep the written intents and
		// the updated transaction record to be sent along with the reply.
		// The transaction metadata is created with the first writing operation.
		// A tricky edge case is that of a transaction which "fails" on the
		// first writing request, but actually manages to write some intents
		// (for example, due to being multi-range). In this case, there will
		// be an error, but the transaction will be marked as Writing and the
		// coordinator must track the state, for the client's retry will be
		// performed with a Writing transaction which the coordinator rejects
		// unless it is tracking it (on top of it making sense to track it;
		// after all, it **has** laid down intents and only the coordinator
		// can augment a potential EndTransaction call).
		// consider re-using those.
		if intents := ba.GetIntents(); len(intents) > 0 && (err == nil || newTxn.Writing) {
			if txnMeta == nil {
				if !newTxn.Writing {
					panic("txn with intents marked as non-writing")
				}
				txnMeta = &txnMetadata{
					txn:              *newTxn,
					keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
					firstUpdateNanos: tc.clock.PhysicalNow(),
					lastUpdateNanos:  tc.clock.PhysicalNow(),
					timeoutDuration:  tc.clientTimeout,
					txnEnd:           make(chan struct{}),
				}
				tc.txns[id] = txnMeta
				// If the transaction is already over, there's no point in
				// launching a one-off coordinator which will shut down right
				// away. If we ended up here with an error, we'll always start
				// the coordinator - the transaction has laid down intents, so
				// we expect it to be committed/aborted at some point in the
				// future.
				if _, isEnding := ba.GetArg(roachpb.EndTransaction); err != nil || !isEnding {
					trace.Event("coordinator spawns")
					if !tc.stopper.RunAsyncTask(func() {
						tc.heartbeatLoop(id)
					}) {
						// The system is already draining and we can't start the
						// heartbeat. We refuse new transactions for now because
						// they're likely not going to have all intents committed.
						// In principle, we can relax this as needed though.
						tc.unregisterTxnLocked(id)
						return roachpb.NewError(&roachpb.NodeUnavailableError{})
					}
				}
			}
			for _, intent := range intents {
				txnMeta.addKeyRange(intent.Key, intent.EndKey)
			}
		}
		// Update our record of this transaction, even on error.
		if txnMeta != nil {
			txnMeta.txn = *newTxn
			if !txnMeta.txn.Writing {
				panic("tracking a non-writing txn")
			}
			txnMeta.setLastUpdate(tc.clock.PhysicalNow())
		}
		if err == nil {
			// For successful transactional requests, always send the updated txn
			// record back.
			br.Txn = newTxn
		}
		return pErr
	}()
}
Esempio n. 14
0
// Send implements the batch.Sender interface. If the request is part of a
// transaction, the TxnCoordSender adds the transaction to a map of active
// transactions and begins heartbeating it. Every subsequent request for the
// same transaction updates the lastUpdate timestamp to prevent live
// transactions from being considered abandoned and garbage collected.
// Read/write mutating requests have their key or key range added to the
// transaction's interval tree of key ranges for eventual cleanup via resolved
// write intents; they're tagged to an outgoing EndTransaction request, with
// the receiving replica in charge of resolving them.
func (tc *TxnCoordSender) Send(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
	if err := tc.maybeBeginTxn(&ba); err != nil {
		return nil, roachpb.NewError(err)
	}
	ba.CmdID = ba.GetOrCreateCmdID(tc.clock.PhysicalNow())
	var startNS int64

	// This is the earliest point at which the request has a ClientCmdID and/or
	// TxnID (if applicable). Begin a Trace which follows this request.
	trace := tc.tracer.NewTrace(tracer.Coord, &ba)
	defer trace.Finalize()
	defer trace.Epoch("sending batch")()
	ctx = tracer.ToCtx(ctx, trace)

	var id string // optional transaction ID
	if ba.Txn != nil {
		// If this request is part of a transaction...
		id = string(ba.Txn.ID)
		// Verify that if this Transaction is not read-only, we have it on
		// file. If not, refuse writes - the client must have issued a write on
		// another coordinator previously.
		if ba.Txn.Writing && ba.IsTransactionWrite() {
			tc.Lock()
			_, ok := tc.txns[id]
			tc.Unlock()
			if !ok {
				return nil, roachpb.NewError(util.Errorf("transaction must not write on multiple coordinators"))
			}
		}

		// Set the timestamp to the original timestamp for read-only
		// commands and to the transaction timestamp for read/write
		// commands.
		if ba.IsReadOnly() {
			ba.Timestamp = ba.Txn.OrigTimestamp
		} else {
			ba.Timestamp = ba.Txn.Timestamp
		}

		if rArgs, ok := ba.GetArg(roachpb.EndTransaction); ok {
			et := rArgs.(*roachpb.EndTransactionRequest)
			if len(et.Key) != 0 {
				return nil, roachpb.NewError(util.Errorf("EndTransaction must not have a Key set"))
			}
			et.Key = ba.Txn.Key
			// Remember when EndTransaction started in case we want to
			// be linearizable.
			startNS = tc.clock.PhysicalNow()
			if len(et.Intents) > 0 {
				// TODO(tschottdorf): it may be useful to allow this later.
				// That would be part of a possible plan to allow txns which
				// write on multiple coordinators.
				return nil, roachpb.NewError(util.Errorf("client must not pass intents to EndTransaction"))
			}
			tc.Lock()
			txnMeta, metaOK := tc.txns[id]
			if id != "" && metaOK {
				et.Intents = txnMeta.intents()
			}
			tc.Unlock()

			if intents := ba.GetIntents(); len(intents) > 0 {
				// Writes in Batch, so EndTransaction is fine. Should add
				// outstanding intents to EndTransaction, though.
				// TODO(tschottdorf): possible issues when the batch fails,
				// but the intents have been added anyways.
				// TODO(tschottdorf): some of these intents may be covered
				// by others, for example {[a,b), a}). This can lead to
				// some extra requests when those are non-local to the txn
				// record. But it doesn't seem worth optimizing now.
				et.Intents = append(et.Intents, intents...)
			} else if !metaOK {
				// If we don't have the transaction, then this must be a retry
				// by the client. We can no longer reconstruct a correct
				// request so we must fail.
				//
				// TODO(bdarnell): if we had a GetTransactionStatus API then
				// we could lookup the transaction and return either nil or
				// TransactionAbortedError instead of this ambivalent error.
				return nil, roachpb.NewError(util.Errorf("transaction is already committed or aborted"))
			}
			if len(et.Intents) == 0 {
				// If there aren't any intents, then there's factually no
				// transaction to end. Read-only txns have all of their state in
				// the client.
				return nil, roachpb.NewError(util.Errorf("cannot commit a read-only transaction"))
			}
			if log.V(1) {
				for _, intent := range et.Intents {
					trace.Event(fmt.Sprintf("intent: [%s,%s)", intent.Key, intent.EndKey))
				}
			}
		}
	}

	// Send the command through wrapped sender, taking appropriate measures
	// on error.
	var br *roachpb.BatchResponse
	{
		var pErr *roachpb.Error
		br, pErr = tc.wrapped.Send(ctx, ba)

		if _, ok := pErr.GoError().(*roachpb.OpRequiresTxnError); ok {
			br, pErr = tc.resendWithTxn(ba)
		}

		if pErr := tc.updateState(ctx, ba, br, pErr); pErr != nil {
			return nil, pErr
		}
	}

	if br.Txn == nil {
		return br, nil
	}

	if _, ok := ba.GetArg(roachpb.EndTransaction); !ok {
		return br, nil
	}
	// If the --linearizable flag is set, we want to make sure that
	// all the clocks in the system are past the commit timestamp
	// of the transaction. This is guaranteed if either
	// - the commit timestamp is MaxOffset behind startNS
	// - MaxOffset ns were spent in this function
	// when returning to the client. Below we choose the option
	// that involves less waiting, which is likely the first one
	// unless a transaction commits with an odd timestamp.
	if tsNS := br.Txn.Timestamp.WallTime; startNS > tsNS {
		startNS = tsNS
	}
	sleepNS := tc.clock.MaxOffset() -
		time.Duration(tc.clock.PhysicalNow()-startNS)
	if tc.linearizable && sleepNS > 0 {
		defer func() {
			if log.V(1) {
				log.Infof("%v: waiting %s on EndTransaction for linearizability", br.Txn.Short(), util.TruncateDuration(sleepNS, time.Millisecond))
			}
			time.Sleep(sleepNS)
		}()
	}
	if br.Txn.Status != roachpb.PENDING {
		tc.cleanupTxn(trace, *br.Txn)
	}
	return br, nil
}
Esempio n. 15
0
// TestClientRetryNonTxn verifies that non-transactional client will
// succeed despite write/write and read/write conflicts. In the case
// where the non-transactional put can push the txn, we expect the
// transaction's value to be written after all retries are complete.
func TestClientRetryNonTxn(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := server.StartTestServer(t)
	defer s.Stop()
	s.SetRangeRetryOptions(retry.Options{
		InitialBackoff: 1 * time.Millisecond,
		MaxBackoff:     5 * time.Millisecond,
		Multiplier:     2,
		MaxRetries:     1,
	})

	testCases := []struct {
		args        roachpb.Request
		isolation   roachpb.IsolationType
		canPush     bool
		expAttempts int
	}{
		// Write/write conflicts.
		{&roachpb.PutRequest{}, roachpb.SNAPSHOT, true, 2},
		{&roachpb.PutRequest{}, roachpb.SERIALIZABLE, true, 2},
		{&roachpb.PutRequest{}, roachpb.SNAPSHOT, false, 1},
		{&roachpb.PutRequest{}, roachpb.SERIALIZABLE, false, 1},
		// Read/write conflicts.
		{&roachpb.GetRequest{}, roachpb.SNAPSHOT, true, 1},
		{&roachpb.GetRequest{}, roachpb.SERIALIZABLE, true, 2},
		{&roachpb.GetRequest{}, roachpb.SNAPSHOT, false, 1},
		{&roachpb.GetRequest{}, roachpb.SERIALIZABLE, false, 1},
	}
	// Lay down a write intent using a txn and attempt to write to same
	// key. Try this twice--once with priorities which will allow the
	// intent to be pushed and once with priorities which will not.
	for i, test := range testCases {
		key := roachpb.Key(fmt.Sprintf("key-%d", i))
		var txnPri int32 = 1
		var clientPri float64 = 1
		if test.canPush {
			clientPri = 2
		} else {
			txnPri = 2
		}

		db, sender := createTestNotifyClient(s.Stopper(), s.ServingAddr(), -clientPri)

		// doneCall signals when the non-txn read or write has completed.
		doneCall := make(chan struct{})
		count := 0 // keeps track of retries
		pErr := db.Txn(func(txn *client.Txn) *roachpb.Error {
			if test.isolation == roachpb.SNAPSHOT {
				if pErr := txn.SetIsolation(roachpb.SNAPSHOT); pErr != nil {
					return pErr
				}
			}
			txn.InternalSetPriority(int32(txnPri))

			count++
			// Lay down the intent.
			if pErr := txn.Put(key, "txn-value"); pErr != nil {
				return pErr
			}
			// The wait group lets us pause txn until after the non-txn method has run once.
			wg := sync.WaitGroup{}
			// On the first true, send the non-txn put or get.
			if count == 1 {
				// We use a "notifying" sender here, which allows us to know exactly when the
				// call has been processed; otherwise, we'd be dependent on timing.
				sender.reset(&wg)
				// We must try the non-txn put or get in a goroutine because
				// it might have to retry and will only succeed immediately in
				// the event we can push.
				go func() {
					var pErr *roachpb.Error
					for i := 0; ; i++ {
						if _, ok := test.args.(*roachpb.GetRequest); ok {
							_, pErr = db.Get(key)
						} else {
							pErr = db.Put(key, "value")
						}
						if _, ok := pErr.GoError().(*roachpb.WriteIntentError); !ok {
							break
						}
					}
					close(doneCall)
					if pErr != nil {
						t.Fatalf("%d: expected success on non-txn call to %s; got %s", i, test.args.Method(), pErr)
					}
				}()
				sender.wait()
			}
			return nil
		})
		if pErr != nil {
			t.Fatalf("%d: expected success writing transactionally; got %s", i, pErr)
		}

		// Make sure non-txn put or get has finished.
		<-doneCall

		// Get the current value to verify whether the txn happened first.
		gr, pErr := db.Get(key)
		if pErr != nil {
			t.Fatalf("%d: expected success getting %q: %s", i, key, pErr)
		}

		if _, isGet := test.args.(*roachpb.GetRequest); isGet || test.canPush {
			if !bytes.Equal(gr.ValueBytes(), []byte("txn-value")) {
				t.Errorf("%d: expected \"txn-value\"; got %q", i, gr.ValueBytes())
			}
		} else {
			if !bytes.Equal(gr.ValueBytes(), []byte("value")) {
				t.Errorf("%d: expected \"value\"; got %q", i, gr.ValueBytes())
			}
		}
		if count != test.expAttempts {
			t.Errorf("%d: expected %d attempt(s); got %d", i, test.expAttempts, count)
		}
	}
}
Esempio n. 16
0
// updateState updates the transaction state in both the success and
// error cases, applying those updates to the corresponding txnMeta
// object when adequate. It also updates certain errors with the
// updated transaction for use by client restarts.
func (tc *TxnCoordSender) updateState(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error) *roachpb.Error {
	trace := tracer.FromCtx(ctx)
	newTxn := &roachpb.Transaction{}
	newTxn.Update(ba.GetTxn())
	err := pErr.GoError()
	switch t := err.(type) {
	case nil:
		newTxn.Update(br.GetTxn())
		// Move txn timestamp forward to response timestamp if applicable.
		// TODO(tschottdorf): see (*Replica).executeBatch and comments within.
		// Looks like this isn't necessary any more, nor did it prevent a bug
		// referenced in a TODO there.
		newTxn.Timestamp.Forward(br.Timestamp)
	case *roachpb.TransactionStatusError:
		// Likely already committed or more obscure errors such as epoch or
		// timestamp regressions; consider txn dead.
		defer tc.cleanupTxn(trace, t.Txn)
	case *roachpb.OpRequiresTxnError:
		// TODO(tschottdorf): range-spanning autowrap currently broken.
		panic("TODO(tschottdorf): disabled")
	case *roachpb.ReadWithinUncertaintyIntervalError:
		// Mark the host as certain. See the protobuf comment for
		// Transaction.CertainNodes for details.
		if t.NodeID == 0 {
			panic("no replica set in header on uncertainty restart")
		}
		newTxn.CertainNodes.Add(t.NodeID)
		// If the reader encountered a newer write within the uncertainty
		// interval, move the timestamp forward, just past that write or
		// up to MaxTimestamp, whichever comes first.
		candidateTS := newTxn.MaxTimestamp
		candidateTS.Backward(t.ExistingTimestamp.Add(0, 1))
		newTxn.Timestamp.Forward(candidateTS)
		newTxn.Restart(ba.GetUserPriority(), newTxn.Priority, newTxn.Timestamp)
		t.Txn = *newTxn
	case *roachpb.TransactionAbortedError:
		// Increase timestamp if applicable.
		newTxn.Timestamp.Forward(t.Txn.Timestamp)
		newTxn.Priority = t.Txn.Priority
		t.Txn = *newTxn
		// Clean up the freshly aborted transaction in defer(), avoiding a
		// race with the state update below.
		defer tc.cleanupTxn(trace, t.Txn)
	case *roachpb.TransactionPushError:
		// Increase timestamp if applicable, ensuring that we're
		// just ahead of the pushee.
		newTxn.Timestamp.Forward(t.PusheeTxn.Timestamp.Add(0, 1))
		newTxn.Restart(ba.GetUserPriority(), t.PusheeTxn.Priority-1, newTxn.Timestamp)
		t.Txn = newTxn
	case *roachpb.TransactionRetryError:
		// Increase timestamp if applicable.
		newTxn.Timestamp.Forward(t.Txn.Timestamp)
		newTxn.Restart(ba.GetUserPriority(), t.Txn.Priority, newTxn.Timestamp)
		t.Txn = *newTxn
	case roachpb.TransactionRestartError:
		// Assertion: The above cases should exhaust all ErrorDetails which
		// carry a Transaction.
		if pErr.Detail != nil {
			panic(fmt.Sprintf("unhandled TransactionRestartError %T", err))
		}
	}

	return func() *roachpb.Error {
		if len(newTxn.ID) <= 0 {
			return pErr
		}
		id := string(newTxn.ID)
		tc.Lock()
		defer tc.Unlock()
		txnMeta := tc.txns[id]
		// For successful transactional requests, keep the written intents and
		// the updated transaction record to be sent along with the reply.
		// The transaction metadata is created with the first writing operation
		// TODO(tschottdorf): already computed the intents prior to sending,
		// consider re-using those.
		if intents := ba.GetIntents(); len(intents) > 0 && err == nil {
			if txnMeta == nil {
				newTxn.Writing = true
				txnMeta = &txnMetadata{
					txn:              *newTxn,
					keys:             cache.NewIntervalCache(cache.Config{Policy: cache.CacheNone}),
					firstUpdateNanos: tc.clock.PhysicalNow(),
					lastUpdateNanos:  tc.clock.PhysicalNow(),
					timeoutDuration:  tc.clientTimeout,
					txnEnd:           make(chan struct{}),
				}
				tc.txns[id] = txnMeta
				// If the transaction is already over, there's no point in
				// launching a one-off coordinator which will shut down right
				// away.
				if _, isEnding := ba.GetArg(roachpb.EndTransaction); !isEnding {
					trace.Event("coordinator spawns")
					if !tc.stopper.RunAsyncTask(func() {
						tc.heartbeatLoop(id)
					}) {
						// The system is already draining and we can't start the
						// heartbeat. We refuse new transactions for now because
						// they're likely not going to have all intents committed.
						// In principle, we can relax this as needed though.
						tc.unregisterTxnLocked(id)
						return roachpb.NewError(&roachpb.NodeUnavailableError{})
					}
				}
			}
			for _, intent := range intents {
				txnMeta.addKeyRange(intent.Key, intent.EndKey)
			}
		}
		// Update our record of this transaction, even on error.
		if txnMeta != nil {
			txnMeta.txn.Update(newTxn) // better to replace after #2300
			if !txnMeta.txn.Writing {
				panic("tracking a non-writing txn")
			}
			txnMeta.setLastUpdate(tc.clock.PhysicalNow())
		}
		if err == nil {
			// For successful transactional requests, always send the updated txn
			// record back.
			if br.Txn == nil {
				br.Txn = &roachpb.Transaction{}
			}
			*br.Txn = *newTxn
		}
		return pErr
	}()
}
Esempio n. 17
0
// Query returns datapoints for the named time series during the supplied time
// span.  Data is returned as a series of consecutive data points.
//
// Data is queried only at the Resolution supplied: if data for the named time
// series is not stored at the given resolution, an empty result will be
// returned.
//
// All data stored on the server is downsampled to some degree; the data points
// returned represent the average value within a sample period. Each datapoint's
// timestamp falls in the middle of the sample period it represents.
//
// If data for the named time series was collected from multiple sources, each
// returned datapoint will represent the sum of datapoints from all sources at
// the same time. The returned string slices contains a list of all sources for
// the metric which were aggregated to produce the result.
func (db *DB) Query(query TimeSeriesQueryRequest_Query, r Resolution,
	startNanos, endNanos int64) ([]*TimeSeriesDatapoint, []string, error) {
	// Normalize startNanos and endNanos the nearest SampleDuration boundary.
	startNanos -= startNanos % r.SampleDuration()

	var rows []client.KeyValue
	if len(query.Sources) == 0 {
		// Based on the supplied timestamps and resolution, construct start and end
		// keys for a scan that will return every key with data relevant to the
		// query.
		startKey := MakeDataKey(query.Name, "" /* source */, r, startNanos)
		endKey := MakeDataKey(query.Name, "" /* source */, r, endNanos).PrefixEnd()
		var pErr *roachpb.Error
		rows, pErr = db.db.Scan(startKey, endKey, 0)
		if pErr != nil {
			return nil, nil, pErr.GoError()
		}
	} else {
		b := db.db.NewBatch()
		// Iterate over all key timestamps which may contain data for the given
		// sources, based on the given start/end time and the resolution.
		for currentTimestamp := startNanos; currentTimestamp <= endNanos; currentTimestamp += r.KeyDuration() {
			for _, source := range query.Sources {
				key := MakeDataKey(query.Name, source, r, currentTimestamp)
				b.Get(key)
			}
		}
		pErr := db.db.Run(b)
		if pErr != nil {
			return nil, nil, pErr.GoError()
		}
		for _, result := range b.Results {
			row := result.Rows[0]
			if row.Value == nil {
				continue
			}
			rows = append(rows, row)
		}
	}

	// Construct a new dataSpan for each distinct source encountered in the
	// query. Each dataspan will contain all data queried from the same source.
	sourceSpans := make(map[string]*dataSpan)
	for _, row := range rows {
		data := &roachpb.InternalTimeSeriesData{}
		if err := row.ValueProto(data); err != nil {
			return nil, nil, err
		}

		_, source, _, _, err := DecodeDataKey(row.Key)
		if err != nil {
			return nil, nil, err
		}
		if _, ok := sourceSpans[source]; !ok {
			sourceSpans[source] = &dataSpan{
				startNanos:  startNanos,
				sampleNanos: data.SampleDurationNanos,
				datas:       make([]calibratedData, 0, 1),
			}
		}
		if err := sourceSpans[source].addData(data); err != nil {
			return nil, nil, err
		}
	}

	var responseData []*TimeSeriesDatapoint
	sources := make([]string, 0, len(sourceSpans))

	// Create an interpolatingIterator for each dataSpan.
	iters := make(unionIterator, 0, len(sourceSpans))
	for name, span := range sourceSpans {
		sources = append(sources, name)
		iters = append(iters, span.newIterator())
	}

	// Iterate through all values in the iteratorSet, adding a datapoint to
	// the response for each value.
	var valueFn func() float64
	switch query.GetAggregator() {
	case TimeSeriesQueryAggregator_AVG:
		valueFn = iters.avg
	case TimeSeriesQueryAggregator_AVG_RATE:
		valueFn = iters.dAvg
	}

	iters.init()
	for iters.isValid() && iters.timestamp() <= endNanos {
		responseData = append(responseData, &TimeSeriesDatapoint{
			TimestampNanos: iters.timestamp(),
			Value:          valueFn(),
		})
		iters.advance()
	}

	return responseData, sources, nil
}