Exemple #1
func printRaftLogEntry(kv engine.MVCCKeyValue) (bool, error) {
	var meta engine.MVCCMetadata
	if err := meta.Unmarshal(kv.Value); err != nil {
		return false, err
	value := roachpb.Value{
		RawBytes: meta.RawBytes,
	var ent raftpb.Entry
	if err := value.GetProto(&ent); err != nil {
		return false, err
	if len(ent.Data) > 0 {
		_, cmdData := storage.DecodeRaftCommand(ent.Data)
		var cmd roachpb.RaftCommand
		if err := cmd.Unmarshal(cmdData); err != nil {
			return false, err
		ent.Data = nil
		fmt.Printf("%s\n", &ent)
		fmt.Printf("%s\n", &cmd)
	} else {
		fmt.Printf("%s: EMPTY\n", &ent)
	return false, nil
Exemple #2
// StoreData writes the supplied time series data to the cockroach server.
// Stored data will be sampled at the supplied resolution.
func (db *DB) StoreData(r Resolution, data []tspb.TimeSeriesData) error {
	var kvs []roachpb.KeyValue

	// Process data collection: data is converted to internal format, and a key
	// is generated for each internal message.
	for _, d := range data {
		idatas, err := d.ToInternal(r.KeyDuration(), r.SampleDuration())
		if err != nil {
			return err
		for _, idata := range idatas {
			var value roachpb.Value
			if err := value.SetProto(&idata); err != nil {
				return err
			kvs = append(kvs, roachpb.KeyValue{
				Key:   MakeDataKey(d.Name, d.Source, r, idata.StartTimestampNanos),
				Value: value,

	// Send the individual internal merge requests.
	b := client.Batch{}
	for _, kv := range kvs {
			Span: roachpb.Span{
				Key: kv.Key,
			Value: kv.Value,

	return db.db.Run(&b)
func setAppliedIndex(
	ctx context.Context,
	eng engine.ReadWriter,
	ms *enginepb.MVCCStats,
	rangeID roachpb.RangeID,
	leaseAppliedIndex uint64,
) error {
	var value roachpb.Value

	if err := engine.MVCCPut(ctx, eng, ms,
		nil /* txn */); err != nil {
		return err
	return engine.MVCCPut(ctx, eng, ms,
		nil /* txn */)
Exemple #4
func setFrozenStatus(
	eng engine.ReadWriter, ms *enginepb.MVCCStats, rangeID roachpb.RangeID, frozen bool,
) error {
	var val roachpb.Value
	return engine.MVCCPut(context.Background(), eng, ms,
		keys.RangeFrozenStatusKey(rangeID), hlc.ZeroTimestamp, val, nil)
Exemple #5
// insertCPutFn is used by insertRow when conflicts should be respected.
// logValue is used for pretty printing.
func insertCPutFn(b *client.Batch, key *roachpb.Key, value *roachpb.Value) {
	// TODO(dan): We want do this V(2) log everywhere in sql. Consider making a
	// client.Batch wrapper instead of inlining it everywhere.
	if log.V(2) {
		log.InfofDepth(1, "CPut %s -> %s", *key, value.PrettyPrint())
	b.CPut(key, value, nil)
// setLastIndex persists a new last index.
func setLastIndex(eng engine.Engine, rangeID roachpb.RangeID, lastIndex uint64) error {
	var value roachpb.Value

	return engine.MVCCPut(eng, nil, keys.RaftLastIndexKey(rangeID),
		nil /* txn */)
Exemple #7
func setLastIndex(eng engine.ReadWriter, rangeID roachpb.RangeID, lastIndex uint64) error {
	var value roachpb.Value

	return engine.MVCCPut(context.Background(), eng, nil, keys.RaftLastIndexKey(rangeID),
		nil /* txn */)
Exemple #8
func maybeUnmarshalInline(v []byte, dest proto.Message) error {
	var meta enginepb.MVCCMetadata
	if err := meta.Unmarshal(v); err != nil {
		return err
	value := roachpb.Value{
		RawBytes: meta.RawBytes,
	return value.GetProto(dest)
// setAppliedIndex persists a new applied index.
func setAppliedIndex(eng engine.Engine, ms *engine.MVCCStats, rangeID roachpb.RangeID, appliedIndex uint64) error {
	var value roachpb.Value

	return engine.MVCCPut(eng, ms,
		nil /* txn */)
// mustGetInt decodes an int64 value from the bytes field of the receiver
// and panics if the bytes field is not 0 or 8 bytes in length.
func mustGetInt(v *roachpb.Value) int64 {
	if v == nil {
		return 0
	i, err := v.GetInt()
	if err != nil {
	return i
Exemple #11
func newInfo(val float64) Info {
	now := timeutil.Now()

	v := roachpb.Value{Timestamp: hlc.Timestamp{WallTime: now.UnixNano()}}

	return Info{
		Value:     v,
		OrigStamp: now.UnixNano(),
		TTLStamp:  now.Add(time.Millisecond).UnixNano(),
func writeRandomTimeSeriesDataToRange(
	t testing.TB,
	store *storage.Store,
	rangeID roachpb.RangeID,
	keyPrefix []byte,
) (midpoint []byte) {
	src := rand.New(rand.NewSource(0))
	r := ts.Resolution10s
	for i := 0; i < 20; i++ {
		var data []tspb.TimeSeriesData
		for j := int64(0); j <= src.Int63n(5); j++ {
			d := tspb.TimeSeriesData{
				Name:   "test.random.metric",
				Source: "cpu01",
			for k := int64(0); k <= src.Int63n(10); k++ {
				d.Datapoints = append(d.Datapoints, tspb.TimeSeriesDatapoint{
					TimestampNanos: src.Int63n(200) * r.KeyDuration(),
					Value:          src.Float64(),
			data = append(data, d)
		for _, d := range data {
			idatas, err := d.ToInternal(r.KeyDuration(), r.SampleDuration())
			if err != nil {
			for _, idata := range idatas {
				var value roachpb.Value
				if err := value.SetProto(&idata); err != nil {
				mArgs := roachpb.MergeRequest{
					Span: roachpb.Span{
						Key: encoding.EncodeVarintAscending(keyPrefix, idata.StartTimestampNanos),
					Value: value,
				if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{
					RangeID: rangeID,
				}, &mArgs); pErr != nil {
	// Return approximate midway point (100 is midway between random timestamps in range [0,200)).
	midKey := append([]byte(nil), keyPrefix...)
	midKey = encoding.EncodeVarintAscending(midKey, 100*r.KeyDuration())
	return keys.MakeRowSentinelKey(midKey)
Exemple #13
// Create the key/value pairs for the default zone config entry.
func createDefaultZoneConfig() []roachpb.KeyValue {
	var ret []roachpb.KeyValue
	value := roachpb.Value{}
	desc := config.DefaultZoneConfig()
	if err := value.SetProto(&desc); err != nil {
		log.Fatalf("could not marshal %v", desc)
	ret = append(ret, roachpb.KeyValue{
		Key:   MakeZoneKey(keys.RootNamespaceID),
		Value: value,
	return ret
Exemple #14
func newInfo(val float64) info {
	now := time.Now()

	v := roachpb.Value{Timestamp: &roachpb.Timestamp{WallTime: now.UnixNano()}}

	return info{
		Info: Info{
			Value:    v,
			TTLStamp: now.Add(time.Millisecond).UnixNano(),
Exemple #15
// setupMVCCData writes up to numVersions values at each of numKeys
// keys. The number of versions written for each key is chosen
// randomly according to a uniform distribution. Each successive
// version is written starting at 5ns and then in 5ns increments. This
// allows scans at various times, starting at t=5ns, and continuing to
// t=5ns*(numVersions+1). A version for each key will be read on every
// such scan, but the dynamics of the scan will change depending on
// the historical timestamp. Earlier timestamps mean scans which must
// skip more historical versions; later timestamps mean scans which
// skip fewer.
// The creation of the rocksdb database is time consuming, especially
// for larger numbers of versions. The database is persisted between
// runs and stored in the current directory as
// "mvcc_scan_<versions>_<keys>".
func setupMVCCScanData(numVersions, numKeys int, b *testing.B) (*RocksDB, *stop.Stopper) {
	loc := fmt.Sprintf("mvcc_scan_%d_%d", numVersions, numKeys)

	exists := true
	if _, err := os.Stat(loc); os.IsNotExist(err) {
		exists = false

	log.Infof("creating mvcc data: %s", loc)
	const cacheSize = 8 << 30 // 8 GB
	stopper := stop.NewStopper()
	rocksdb := NewRocksDB(roachpb.Attributes{Attrs: []string{"ssd"}}, loc, cacheSize, stopper)
	if err := rocksdb.Open(); err != nil {
		b.Fatalf("could not create new rocksdb db instance at %s: %v", loc, err)

	if exists {
		return rocksdb, stopper

	rng, _ := randutil.NewPseudoRand()
	keys := make([]roachpb.Key, numKeys)
	nvs := make([]int, numKeys)
	for t := 1; t <= numVersions; t++ {
		walltime := int64(5 * t)
		ts := makeTS(walltime, 0)
		batch := rocksdb.NewBatch()
		for i := 0; i < numKeys; i++ {
			if t == 1 {
				keys[i] = roachpb.Key(encoding.EncodeUvarint([]byte("key-"), uint64(i)))
				nvs[i] = int(rand.Int31n(int32(numVersions)) + 1)
			// Only write values if this iteration is less than the random
			// number of versions chosen for this key.
			if t <= nvs[i] {
				value := roachpb.Value{Bytes: randutil.RandBytes(rng, 1024)}
				if err := MVCCPut(batch, nil, keys[i], ts, value, nil); err != nil {
		if err := batch.Commit(); err != nil {
	rocksdb.CompactRange(nil, nil)

	return rocksdb, stopper
Exemple #16
// storeGossipUpdate is the gossip callback used to keep the StorePool up to date.
func (sp *StorePool) storeGossipUpdate(_ string, content roachpb.Value) {
	var storeDesc roachpb.StoreDescriptor
	if err := content.GetProto(&storeDesc); err != nil {
		log.Error(context.TODO(), err)

	defer sp.mu.Unlock()
	// Does this storeDetail exist yet?
	detail := sp.getStoreDetailLocked(storeDesc.StoreID)
	detail.markAlive(sp.clock.Now(), &storeDesc)
Exemple #17
func tryMeta(kv engine.MVCCKeyValue) (string, error) {
	if !bytes.HasPrefix(kv.Key.Key, keys.Meta1Prefix) && !bytes.HasPrefix(kv.Key.Key, keys.Meta2Prefix) {
		return "", errors.New("not a meta key")
	value := roachpb.Value{
		Timestamp: kv.Key.Timestamp,
		RawBytes:  kv.Value,
	var desc roachpb.RangeDescriptor
	if err := value.GetProto(&desc); err != nil {
		return "", err
	return descStr(desc), nil
// PutSequence writes a sequence number for the specified family.
func (rc *ResponseCache) PutSequence(e engine.Engine, family []byte, sequence int64, err error) error {
	if sequence <= 0 || len(family) == 0 {
		return errEmptyID
	if !rc.shouldCacheError(err) {
		return nil

	// Write the response value to the engine.
	key := keys.ResponseCacheKey(rc.rangeID, family)
	var v roachpb.Value
	return engine.MVCCPut(e, nil /* ms */, key, roachpb.ZeroTimestamp, v, nil /* txn */)
// BenchmarkMVCCMergeTimeSeries computes performance of merging time series data.
func BenchmarkMVCCMergeTimeSeries(b *testing.B) {
	ts := &roachpb.InternalTimeSeriesData{
		StartTimestampNanos: 0,
		SampleDurationNanos: 1000,
		Samples: []*roachpb.InternalTimeSeriesSample{
			{Offset: 0, Count: 1, Sum: 5.0},
	var value roachpb.Value
	if err := value.SetProto(ts); err != nil {
	runMVCCMerge(&value, 1024, b)
// append the given entries to the raft log. Takes the previous values of
// r.mu.lastIndex and r.mu.raftLogSize, and returns new values. We do this
// rather than modifying them directly because these modifications need to be
// atomic with the commit of the batch.
func (r *Replica) append(
	ctx context.Context,
	batch engine.ReadWriter,
	prevLastIndex uint64,
	prevRaftLogSize int64,
	entries []raftpb.Entry,
) (uint64, int64, error) {
	if len(entries) == 0 {
		return prevLastIndex, prevRaftLogSize, nil
	var diff enginepb.MVCCStats
	var value roachpb.Value
	for i := range entries {
		ent := &entries[i]
		key := keys.RaftLogKey(r.RangeID, ent.Index)
		if err := value.SetProto(ent); err != nil {
			return 0, 0, err
		var err error
		if ent.Index > prevLastIndex {
			err = engine.MVCCBlindPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */)
		} else {
			err = engine.MVCCPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */)
		if err != nil {
			return 0, 0, err

	// Delete any previously appended log entries which never committed.
	lastIndex := entries[len(entries)-1].Index
	for i := lastIndex + 1; i <= prevLastIndex; i++ {
		err := engine.MVCCDelete(ctx, batch, &diff, keys.RaftLogKey(r.RangeID, i),
			hlc.ZeroTimestamp, nil /* txn */)
		if err != nil {
			return 0, 0, err

	if err := setLastIndex(ctx, batch, r.RangeID, lastIndex); err != nil {
		return 0, 0, err

	raftLogSize := prevRaftLogSize + diff.SysBytes

	return lastIndex, raftLogSize, nil
Exemple #21
// deadReplicasGossipUpdate is the gossip callback used to keep the StorePool up to date.
func (sp *StorePool) deadReplicasGossipUpdate(_ string, content roachpb.Value) {
	var replicas roachpb.StoreDeadReplicas
	if err := content.GetProto(&replicas); err != nil {
		log.Error(context.TODO(), err)

	defer sp.mu.Unlock()
	detail := sp.getStoreDetailLocked(replicas.StoreID)
	deadReplicas := make(map[roachpb.RangeID][]roachpb.ReplicaDescriptor)
	for _, r := range replicas.Replicas {
		deadReplicas[r.RangeID] = append(deadReplicas[r.RangeID], r.Replica)
	detail.deadReplicas = deadReplicas
Exemple #22
// updateNodeAddress is a gossip callback which fires with each
// update to the node address. This allows us to compute the
// total size of the gossip network (for determining max peers
// each gossip node is allowed to have), as well as to create
// new resolvers for each encountered host and to write the
// set of gossip node addresses to persistent storage when it
// changes.
func (g *Gossip) updateNodeAddress(_ string, content roachpb.Value) {
	var desc roachpb.NodeDescriptor
	if err := content.GetProto(&desc); err != nil {

	defer g.mu.Unlock()

	// Recompute max peers based on size of network and set the max
	// sizes for incoming and outgoing node sets.
	defer func() {
		maxPeers := g.maxPeers(len(g.nodeDescs))

	// Skip if the node has already been seen or it's our own address.
	if _, ok := g.nodeDescs[desc.NodeID]; ok || desc.Address == g.is.NodeAddr {
	g.nodeDescs[desc.NodeID] = &desc

	// Add this new node to our list of resolvers so we can keep
	// connecting to gossip if the original resolvers go offline.
	r, err := resolver.NewResolverFromUnresolvedAddr(desc.Address)
	if err != nil {
		log.Warningf("bad address from gossip node %s: %s", desc, err)
	if !g.haveResolver(r) {
		g.resolvers = append(g.resolvers, r)
	// Add new address to bootstrap info and persist if possible.
	if !g.haveBootstrapAddress(desc.Address) {
		g.bootstrapInfo.Addresses = append(g.bootstrapInfo.Addresses, desc.Address)
		if g.storage != nil {
			// TODO(spencer): need to clean up ancient gossip nodes, which
			//   will otherwise stick around in the bootstrap info forever.
			if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil {
Exemple #23
func tryRangeDescriptor(kv engine.MVCCKeyValue) (string, error) {
	_, suffix, _, err := keys.DecodeRangeKey(kv.Key.Key)
	if err != nil {
		return "", err
	if !bytes.Equal(suffix, keys.LocalRangeDescriptorSuffix) {
		return "", fmt.Errorf("wrong suffix: %s", suffix)
	value := roachpb.Value{
		RawBytes: kv.Value,
	var desc roachpb.RangeDescriptor
	if err := value.GetProto(&desc); err != nil {
		return "", err
	return descStr(desc), nil
Exemple #24
// Indirectly this tests that the transaction remembers the NodeID of the node
// being read from correctly, at least in this simple case. Not remembering the
// node would lead to thousands of transaction restarts and almost certainly a
// test timeout.
func TestUncertaintyRestarts(t *testing.T) {
	defer leaktest.AfterTest(t)
	s := createTestDB(t)
	defer s.Stop()
	// Set a large offset so that a busy restart-loop
	// really shows. Also makes sure that the values
	// we write in the future below don't actually
	// wind up in the past.
	offset := 4000 * time.Millisecond
	key := roachpb.Key("key")
	value := roachpb.Value{
		Bytes: nil, // Set for each Put
	// With the correct restart behaviour, we see only one restart
	// and the value read is the very first one (as nothing else
	// has been written)
	wantedBytes := []byte("value-0")

	i := -1
	tErr := s.DB.Txn(func(txn *client.Txn) error {
		futureTS := s.Clock.Now()
		value.Bytes = []byte(fmt.Sprintf("value-%d", i))
		if err := engine.MVCCPut(s.Eng, nil, key, futureTS, value, nil); err != nil {
		gr, err := txn.Get(key)
		if err != nil {
			return err
		if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), wantedBytes) {
			t.Fatalf("%d: read wrong value: %v, wanted %q", i, gr.Value, wantedBytes)
		return nil
	if i != 1 {
		t.Errorf("txn restarted %d times, expected only one restart", i)
	if tErr != nil {
Exemple #25
// StoreData writes the supplied time series data to the cockroach server.
// Stored data will be sampled at the supplied resolution.
func (db *DB) StoreData(r Resolution, data []TimeSeriesData) error {
	var kvs []roachpb.KeyValue

	// Process data collection: data is converted to internal format, and a key
	// is generated for each internal message.
	for _, d := range data {
		idatas, err := d.ToInternal(r.KeyDuration(), r.SampleDuration())
		if err != nil {
			return err
		for _, idata := range idatas {
			var value roachpb.Value
			if err := value.SetProto(idata); err != nil {
				return err
			kvs = append(kvs, roachpb.KeyValue{
				Key:   MakeDataKey(d.Name, d.Source, r, idata.StartTimestampNanos),
				Value: value,

	// Send the individual internal merge requests.
	// TODO(mrtracy): In the likely event that there are multiple values to
	// merge, they should be batched together instead of being called
	// individually. However, BatchRequest currently does not support
	// MergeRequest, probably because it cannot be part of a
	// transaction. Look into batching this.
	for _, kv := range kvs {
		// Note, this looks like a batch, but isn't a batch because we only add a
		// single request to it.
		b := &client.Batch{}
			Span: roachpb.Span{
				Key: kv.Key,
			Value: kv.Value,
		if err := db.db.Run(b); err != nil {
			return err

	return nil
Exemple #26
func printRangeDescriptor(kv engine.MVCCKeyValue) (bool, error) {
	startKey, suffix, _, err := keys.DecodeRangeKey(kv.Key.Key)
	if err != nil {
		return false, err
	if !bytes.Equal(suffix, keys.LocalRangeDescriptorSuffix) {
		return false, nil
	value := roachpb.Value{
		RawBytes: kv.Value,
	var desc roachpb.RangeDescriptor
	if err := value.GetProto(&desc); err != nil {
		return false, err
	fmt.Printf("Range descriptor with start key %s at time %s\n%s\n", startKey, kv.Key.Timestamp.GoTime(), &desc)
	return false, nil
Exemple #27
// updateSystemConfig is the raw gossip info callback.
// Unmarshal the system config, and if successfuly, update out
// copy and run the callbacks.
func (g *Gossip) updateSystemConfig(key string, content roachpb.Value) {
	if key != KeySystemConfig {
		log.Fatalf("wrong key received on SystemConfig callback: %s", key)
	cfg := &config.SystemConfig{}
	if err := content.GetProto(cfg); err != nil {
		log.Errorf("could not unmarshal system config on callback: %s", err)

	defer g.systemConfigMu.Unlock()
	g.systemConfig = cfg
	for _, cb := range g.systemConfigCallbacks {
		go cb(cfg)
Exemple #28
// updateNodeAddress is a gossip callback which fires with each
// update to the node address. This allows us to compute the
// total size of the gossip network (for determining max peers
// each gossip node is allowed to have), as well as to create
// new resolvers for each encountered host and to write the
// set of gossip node addresses to persistent storage when it
// changes.
func (g *Gossip) updateNodeAddress(_ string, content roachpb.Value) {
	var desc roachpb.NodeDescriptor
	if err := content.GetProto(&desc); err != nil {

	defer g.mu.Unlock()

	// Skip if the node has already been seen.
	if _, ok := g.nodeDescs[desc.NodeID]; ok {

	g.nodeDescs[desc.NodeID] = &desc

	// Recompute max peers based on size of network and set the max
	// sizes for incoming and outgoing node sets.
	maxPeers := g.maxPeers(len(g.nodeDescs))

	// Skip if it's our own address.
	if desc.Address == g.is.NodeAddr {

	// Add this new node address (if it's not already there) to our list
	// of resolvers so we can keep connecting to gossip if the original
	// resolvers go offline.

	// Add new address (if it's not already there) to bootstrap info and
	// persist if possible.
	if g.maybeAddBootstrapAddress(desc.Address) && g.storage != nil {
		// TODO(spencer): need to clean up ancient gossip nodes, which
		//   will otherwise stick around in the bootstrap info forever.
		if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil {
Exemple #29
// MergeInternalTimeSeriesData exports the engine's C++ merge logic for
// InternalTimeSeriesData to higher level packages. This is intended primarily
// for consumption by high level testing of time series functionality.
func MergeInternalTimeSeriesData(
	sources ...roachpb.InternalTimeSeriesData,
) (roachpb.InternalTimeSeriesData, error) {
	// Wrap each proto in an inlined MVCC value, and marshal each wrapped value
	// to bytes. This is the format required by the engine.
	srcBytes := make([][]byte, 0, len(sources))
	for _, src := range sources {
		var val roachpb.Value
		if err := val.SetProto(&src); err != nil {
			return roachpb.InternalTimeSeriesData{}, err
		bytes, err := protoutil.Marshal(&MVCCMetadata{
			RawBytes: val.RawBytes,
		if err != nil {
			return roachpb.InternalTimeSeriesData{}, err
		srcBytes = append(srcBytes, bytes)

	// Merge every element into a nil byte slice, one at a time.
	var (
		mergedBytes []byte
		err         error
	for _, bytes := range srcBytes {
		mergedBytes, err = goMerge(mergedBytes, bytes)
		if err != nil {
			return roachpb.InternalTimeSeriesData{}, err

	// Unmarshal merged bytes and extract the time series value within.
	var meta MVCCMetadata
	if err := proto.Unmarshal(mergedBytes, &meta); err != nil {
		return roachpb.InternalTimeSeriesData{}, err
	mergedTS, err := meta.Value().GetTimeseries()
	if err != nil {
		return roachpb.InternalTimeSeriesData{}, err
	return mergedTS, nil
Exemple #30
// storeGossipUpdate is the gossip callback used to keep the StorePool up to date.
func (sp *StorePool) storeGossipUpdate(_ string, content roachpb.Value) {
	var storeDesc roachpb.StoreDescriptor
	if err := content.GetProto(&storeDesc); err != nil {

	defer sp.mu.Unlock()
	// Does this storeDetail exist yet?
	detail, ok := sp.stores[storeDesc.StoreID]
	if !ok {
		// Setting index to -1 ensures this gets added to the queue.
		detail = &storeDetail{index: -1}
		sp.stores[storeDesc.StoreID] = detail
	detail.markAlive(sp.clock.Now(), storeDesc, true)