func printRaftLogEntry(kv engine.MVCCKeyValue) (bool, error) { var meta engine.MVCCMetadata if err := meta.Unmarshal(kv.Value); err != nil { return false, err } value := roachpb.Value{ RawBytes: meta.RawBytes, } var ent raftpb.Entry if err := value.GetProto(&ent); err != nil { return false, err } if len(ent.Data) > 0 { _, cmdData := storage.DecodeRaftCommand(ent.Data) var cmd roachpb.RaftCommand if err := cmd.Unmarshal(cmdData); err != nil { return false, err } ent.Data = nil fmt.Printf("%s\n", &ent) fmt.Printf("%s\n", &cmd) } else { fmt.Printf("%s: EMPTY\n", &ent) } return false, nil }
// StoreData writes the supplied time series data to the cockroach server. // Stored data will be sampled at the supplied resolution. func (db *DB) StoreData(r Resolution, data []tspb.TimeSeriesData) error { var kvs []roachpb.KeyValue // Process data collection: data is converted to internal format, and a key // is generated for each internal message. for _, d := range data { idatas, err := d.ToInternal(r.KeyDuration(), r.SampleDuration()) if err != nil { return err } for _, idata := range idatas { var value roachpb.Value if err := value.SetProto(&idata); err != nil { return err } kvs = append(kvs, roachpb.KeyValue{ Key: MakeDataKey(d.Name, d.Source, r, idata.StartTimestampNanos), Value: value, }) } } // Send the individual internal merge requests. b := client.Batch{} for _, kv := range kvs { b.AddRawRequest(&roachpb.MergeRequest{ Span: roachpb.Span{ Key: kv.Key, }, Value: kv.Value, }) } return db.db.Run(&b) }
func setAppliedIndex( ctx context.Context, eng engine.ReadWriter, ms *enginepb.MVCCStats, rangeID roachpb.RangeID, appliedIndex, leaseAppliedIndex uint64, ) error { var value roachpb.Value value.SetInt(int64(appliedIndex)) if err := engine.MVCCPut(ctx, eng, ms, keys.RaftAppliedIndexKey(rangeID), hlc.ZeroTimestamp, value, nil /* txn */); err != nil { return err } value.SetInt(int64(leaseAppliedIndex)) return engine.MVCCPut(ctx, eng, ms, keys.LeaseAppliedIndexKey(rangeID), hlc.ZeroTimestamp, value, nil /* txn */) }
func setFrozenStatus( eng engine.ReadWriter, ms *enginepb.MVCCStats, rangeID roachpb.RangeID, frozen bool, ) error { var val roachpb.Value val.SetBool(frozen) return engine.MVCCPut(context.Background(), eng, ms, keys.RangeFrozenStatusKey(rangeID), hlc.ZeroTimestamp, val, nil) }
// insertCPutFn is used by insertRow when conflicts should be respected. // logValue is used for pretty printing. func insertCPutFn(b *client.Batch, key *roachpb.Key, value *roachpb.Value) { // TODO(dan): We want do this V(2) log everywhere in sql. Consider making a // client.Batch wrapper instead of inlining it everywhere. if log.V(2) { log.InfofDepth(1, "CPut %s -> %s", *key, value.PrettyPrint()) } b.CPut(key, value, nil) }
// setLastIndex persists a new last index. func setLastIndex(eng engine.Engine, rangeID roachpb.RangeID, lastIndex uint64) error { var value roachpb.Value value.SetInt(int64(lastIndex)) return engine.MVCCPut(eng, nil, keys.RaftLastIndexKey(rangeID), roachpb.ZeroTimestamp, value, nil /* txn */) }
func setLastIndex(eng engine.ReadWriter, rangeID roachpb.RangeID, lastIndex uint64) error { var value roachpb.Value value.SetInt(int64(lastIndex)) return engine.MVCCPut(context.Background(), eng, nil, keys.RaftLastIndexKey(rangeID), hlc.ZeroTimestamp, value, nil /* txn */) }
func maybeUnmarshalInline(v []byte, dest proto.Message) error { var meta enginepb.MVCCMetadata if err := meta.Unmarshal(v); err != nil { return err } value := roachpb.Value{ RawBytes: meta.RawBytes, } return value.GetProto(dest) }
// setAppliedIndex persists a new applied index. func setAppliedIndex(eng engine.Engine, ms *engine.MVCCStats, rangeID roachpb.RangeID, appliedIndex uint64) error { var value roachpb.Value value.SetInt(int64(appliedIndex)) return engine.MVCCPut(eng, ms, keys.RaftAppliedIndexKey(rangeID), roachpb.ZeroTimestamp, value, nil /* txn */) }
// mustGetInt decodes an int64 value from the bytes field of the receiver // and panics if the bytes field is not 0 or 8 bytes in length. func mustGetInt(v *roachpb.Value) int64 { if v == nil { return 0 } i, err := v.GetInt() if err != nil { panic(err) } return i }
func newInfo(val float64) Info { now := timeutil.Now() v := roachpb.Value{Timestamp: hlc.Timestamp{WallTime: now.UnixNano()}} v.SetFloat(val) return Info{ Value: v, OrigStamp: now.UnixNano(), TTLStamp: now.Add(time.Millisecond).UnixNano(), } }
func writeRandomTimeSeriesDataToRange( t testing.TB, store *storage.Store, rangeID roachpb.RangeID, keyPrefix []byte, ) (midpoint []byte) { src := rand.New(rand.NewSource(0)) r := ts.Resolution10s for i := 0; i < 20; i++ { var data []tspb.TimeSeriesData for j := int64(0); j <= src.Int63n(5); j++ { d := tspb.TimeSeriesData{ Name: "test.random.metric", Source: "cpu01", } for k := int64(0); k <= src.Int63n(10); k++ { d.Datapoints = append(d.Datapoints, tspb.TimeSeriesDatapoint{ TimestampNanos: src.Int63n(200) * r.KeyDuration(), Value: src.Float64(), }) } data = append(data, d) } for _, d := range data { idatas, err := d.ToInternal(r.KeyDuration(), r.SampleDuration()) if err != nil { t.Fatal(err) } for _, idata := range idatas { var value roachpb.Value if err := value.SetProto(&idata); err != nil { t.Fatal(err) } mArgs := roachpb.MergeRequest{ Span: roachpb.Span{ Key: encoding.EncodeVarintAscending(keyPrefix, idata.StartTimestampNanos), }, Value: value, } if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rangeID, }, &mArgs); pErr != nil { t.Fatal(pErr) } } } } // Return approximate midway point (100 is midway between random timestamps in range [0,200)). midKey := append([]byte(nil), keyPrefix...) midKey = encoding.EncodeVarintAscending(midKey, 100*r.KeyDuration()) return keys.MakeRowSentinelKey(midKey) }
// Create the key/value pairs for the default zone config entry. func createDefaultZoneConfig() []roachpb.KeyValue { var ret []roachpb.KeyValue value := roachpb.Value{} desc := config.DefaultZoneConfig() if err := value.SetProto(&desc); err != nil { log.Fatalf("could not marshal %v", desc) } ret = append(ret, roachpb.KeyValue{ Key: MakeZoneKey(keys.RootNamespaceID), Value: value, }) return ret }
func newInfo(val float64) info { now := time.Now() v := roachpb.Value{Timestamp: &roachpb.Timestamp{WallTime: now.UnixNano()}} v.SetFloat(val) return info{ Info: Info{ Value: v, TTLStamp: now.Add(time.Millisecond).UnixNano(), }, } }
// setupMVCCData writes up to numVersions values at each of numKeys // keys. The number of versions written for each key is chosen // randomly according to a uniform distribution. Each successive // version is written starting at 5ns and then in 5ns increments. This // allows scans at various times, starting at t=5ns, and continuing to // t=5ns*(numVersions+1). A version for each key will be read on every // such scan, but the dynamics of the scan will change depending on // the historical timestamp. Earlier timestamps mean scans which must // skip more historical versions; later timestamps mean scans which // skip fewer. // // The creation of the rocksdb database is time consuming, especially // for larger numbers of versions. The database is persisted between // runs and stored in the current directory as // "mvcc_scan_<versions>_<keys>". func setupMVCCScanData(numVersions, numKeys int, b *testing.B) (*RocksDB, *stop.Stopper) { loc := fmt.Sprintf("mvcc_scan_%d_%d", numVersions, numKeys) exists := true if _, err := os.Stat(loc); os.IsNotExist(err) { exists = false } log.Infof("creating mvcc data: %s", loc) const cacheSize = 8 << 30 // 8 GB stopper := stop.NewStopper() rocksdb := NewRocksDB(roachpb.Attributes{Attrs: []string{"ssd"}}, loc, cacheSize, stopper) if err := rocksdb.Open(); err != nil { b.Fatalf("could not create new rocksdb db instance at %s: %v", loc, err) } if exists { return rocksdb, stopper } rng, _ := randutil.NewPseudoRand() keys := make([]roachpb.Key, numKeys) nvs := make([]int, numKeys) for t := 1; t <= numVersions; t++ { walltime := int64(5 * t) ts := makeTS(walltime, 0) batch := rocksdb.NewBatch() for i := 0; i < numKeys; i++ { if t == 1 { keys[i] = roachpb.Key(encoding.EncodeUvarint([]byte("key-"), uint64(i))) nvs[i] = int(rand.Int31n(int32(numVersions)) + 1) } // Only write values if this iteration is less than the random // number of versions chosen for this key. if t <= nvs[i] { value := roachpb.Value{Bytes: randutil.RandBytes(rng, 1024)} value.InitChecksum(keys[i]) if err := MVCCPut(batch, nil, keys[i], ts, value, nil); err != nil { b.Fatal(err) } } } if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() } rocksdb.CompactRange(nil, nil) return rocksdb, stopper }
// storeGossipUpdate is the gossip callback used to keep the StorePool up to date. func (sp *StorePool) storeGossipUpdate(_ string, content roachpb.Value) { var storeDesc roachpb.StoreDescriptor if err := content.GetProto(&storeDesc); err != nil { log.Error(context.TODO(), err) return } sp.mu.Lock() defer sp.mu.Unlock() // Does this storeDetail exist yet? detail := sp.getStoreDetailLocked(storeDesc.StoreID) detail.markAlive(sp.clock.Now(), &storeDesc) sp.mu.queue.enqueue(detail) }
func tryMeta(kv engine.MVCCKeyValue) (string, error) { if !bytes.HasPrefix(kv.Key.Key, keys.Meta1Prefix) && !bytes.HasPrefix(kv.Key.Key, keys.Meta2Prefix) { return "", errors.New("not a meta key") } value := roachpb.Value{ Timestamp: kv.Key.Timestamp, RawBytes: kv.Value, } var desc roachpb.RangeDescriptor if err := value.GetProto(&desc); err != nil { return "", err } return descStr(desc), nil }
// PutSequence writes a sequence number for the specified family. func (rc *ResponseCache) PutSequence(e engine.Engine, family []byte, sequence int64, err error) error { if sequence <= 0 || len(family) == 0 { return errEmptyID } if !rc.shouldCacheError(err) { return nil } // Write the response value to the engine. key := keys.ResponseCacheKey(rc.rangeID, family) var v roachpb.Value v.SetInt(sequence) return engine.MVCCPut(e, nil /* ms */, key, roachpb.ZeroTimestamp, v, nil /* txn */) }
// BenchmarkMVCCMergeTimeSeries computes performance of merging time series data. func BenchmarkMVCCMergeTimeSeries(b *testing.B) { ts := &roachpb.InternalTimeSeriesData{ StartTimestampNanos: 0, SampleDurationNanos: 1000, Samples: []*roachpb.InternalTimeSeriesSample{ {Offset: 0, Count: 1, Sum: 5.0}, }, } var value roachpb.Value if err := value.SetProto(ts); err != nil { b.Fatal(err) } runMVCCMerge(&value, 1024, b) }
// append the given entries to the raft log. Takes the previous values of // r.mu.lastIndex and r.mu.raftLogSize, and returns new values. We do this // rather than modifying them directly because these modifications need to be // atomic with the commit of the batch. func (r *Replica) append( ctx context.Context, batch engine.ReadWriter, prevLastIndex uint64, prevRaftLogSize int64, entries []raftpb.Entry, ) (uint64, int64, error) { if len(entries) == 0 { return prevLastIndex, prevRaftLogSize, nil } var diff enginepb.MVCCStats var value roachpb.Value for i := range entries { ent := &entries[i] key := keys.RaftLogKey(r.RangeID, ent.Index) if err := value.SetProto(ent); err != nil { return 0, 0, err } value.InitChecksum(key) var err error if ent.Index > prevLastIndex { err = engine.MVCCBlindPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */) } else { err = engine.MVCCPut(ctx, batch, &diff, key, hlc.ZeroTimestamp, value, nil /* txn */) } if err != nil { return 0, 0, err } } // Delete any previously appended log entries which never committed. lastIndex := entries[len(entries)-1].Index for i := lastIndex + 1; i <= prevLastIndex; i++ { err := engine.MVCCDelete(ctx, batch, &diff, keys.RaftLogKey(r.RangeID, i), hlc.ZeroTimestamp, nil /* txn */) if err != nil { return 0, 0, err } } if err := setLastIndex(ctx, batch, r.RangeID, lastIndex); err != nil { return 0, 0, err } raftLogSize := prevRaftLogSize + diff.SysBytes return lastIndex, raftLogSize, nil }
// deadReplicasGossipUpdate is the gossip callback used to keep the StorePool up to date. func (sp *StorePool) deadReplicasGossipUpdate(_ string, content roachpb.Value) { var replicas roachpb.StoreDeadReplicas if err := content.GetProto(&replicas); err != nil { log.Error(context.TODO(), err) return } sp.mu.Lock() defer sp.mu.Unlock() detail := sp.getStoreDetailLocked(replicas.StoreID) deadReplicas := make(map[roachpb.RangeID][]roachpb.ReplicaDescriptor) for _, r := range replicas.Replicas { deadReplicas[r.RangeID] = append(deadReplicas[r.RangeID], r.Replica) } detail.deadReplicas = deadReplicas }
// updateNodeAddress is a gossip callback which fires with each // update to the node address. This allows us to compute the // total size of the gossip network (for determining max peers // each gossip node is allowed to have), as well as to create // new resolvers for each encountered host and to write the // set of gossip node addresses to persistent storage when it // changes. func (g *Gossip) updateNodeAddress(_ string, content roachpb.Value) { var desc roachpb.NodeDescriptor if err := content.GetProto(&desc); err != nil { log.Error(err) return } g.mu.Lock() defer g.mu.Unlock() // Recompute max peers based on size of network and set the max // sizes for incoming and outgoing node sets. defer func() { maxPeers := g.maxPeers(len(g.nodeDescs)) g.incoming.setMaxSize(maxPeers) g.outgoing.setMaxSize(maxPeers) }() // Skip if the node has already been seen or it's our own address. if _, ok := g.nodeDescs[desc.NodeID]; ok || desc.Address == g.is.NodeAddr { return } g.nodeDescs[desc.NodeID] = &desc // Add this new node to our list of resolvers so we can keep // connecting to gossip if the original resolvers go offline. r, err := resolver.NewResolverFromUnresolvedAddr(desc.Address) if err != nil { log.Warningf("bad address from gossip node %s: %s", desc, err) return } if !g.haveResolver(r) { g.resolvers = append(g.resolvers, r) } // Add new address to bootstrap info and persist if possible. if !g.haveBootstrapAddress(desc.Address) { g.bootstrapInfo.Addresses = append(g.bootstrapInfo.Addresses, desc.Address) if g.storage != nil { // TODO(spencer): need to clean up ancient gossip nodes, which // will otherwise stick around in the bootstrap info forever. if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil { log.Error(err) } } } }
func tryRangeDescriptor(kv engine.MVCCKeyValue) (string, error) { _, suffix, _, err := keys.DecodeRangeKey(kv.Key.Key) if err != nil { return "", err } if !bytes.Equal(suffix, keys.LocalRangeDescriptorSuffix) { return "", fmt.Errorf("wrong suffix: %s", suffix) } value := roachpb.Value{ RawBytes: kv.Value, } var desc roachpb.RangeDescriptor if err := value.GetProto(&desc); err != nil { return "", err } return descStr(desc), nil }
// Indirectly this tests that the transaction remembers the NodeID of the node // being read from correctly, at least in this simple case. Not remembering the // node would lead to thousands of transaction restarts and almost certainly a // test timeout. func TestUncertaintyRestarts(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() // Set a large offset so that a busy restart-loop // really shows. Also makes sure that the values // we write in the future below don't actually // wind up in the past. offset := 4000 * time.Millisecond s.Clock.SetMaxOffset(offset) key := roachpb.Key("key") value := roachpb.Value{ Bytes: nil, // Set for each Put } // With the correct restart behaviour, we see only one restart // and the value read is the very first one (as nothing else // has been written) wantedBytes := []byte("value-0") i := -1 tErr := s.DB.Txn(func(txn *client.Txn) error { i++ s.Manual.Increment(1) futureTS := s.Clock.Now() futureTS.WallTime++ value.Bytes = []byte(fmt.Sprintf("value-%d", i)) if err := engine.MVCCPut(s.Eng, nil, key, futureTS, value, nil); err != nil { t.Fatal(err) } gr, err := txn.Get(key) if err != nil { return err } if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), wantedBytes) { t.Fatalf("%d: read wrong value: %v, wanted %q", i, gr.Value, wantedBytes) } return nil }) if i != 1 { t.Errorf("txn restarted %d times, expected only one restart", i) } if tErr != nil { t.Fatal(tErr) } }
// StoreData writes the supplied time series data to the cockroach server. // Stored data will be sampled at the supplied resolution. func (db *DB) StoreData(r Resolution, data []TimeSeriesData) error { var kvs []roachpb.KeyValue // Process data collection: data is converted to internal format, and a key // is generated for each internal message. for _, d := range data { idatas, err := d.ToInternal(r.KeyDuration(), r.SampleDuration()) if err != nil { return err } for _, idata := range idatas { var value roachpb.Value if err := value.SetProto(idata); err != nil { return err } kvs = append(kvs, roachpb.KeyValue{ Key: MakeDataKey(d.Name, d.Source, r, idata.StartTimestampNanos), Value: value, }) } } // Send the individual internal merge requests. // TODO(mrtracy): In the likely event that there are multiple values to // merge, they should be batched together instead of being called // individually. However, BatchRequest currently does not support // MergeRequest, probably because it cannot be part of a // transaction. Look into batching this. for _, kv := range kvs { // Note, this looks like a batch, but isn't a batch because we only add a // single request to it. b := &client.Batch{} b.InternalAddRequest(&roachpb.MergeRequest{ Span: roachpb.Span{ Key: kv.Key, }, Value: kv.Value, }) if err := db.db.Run(b); err != nil { return err } } return nil }
func printRangeDescriptor(kv engine.MVCCKeyValue) (bool, error) { startKey, suffix, _, err := keys.DecodeRangeKey(kv.Key.Key) if err != nil { return false, err } if !bytes.Equal(suffix, keys.LocalRangeDescriptorSuffix) { return false, nil } value := roachpb.Value{ RawBytes: kv.Value, } var desc roachpb.RangeDescriptor if err := value.GetProto(&desc); err != nil { return false, err } fmt.Printf("Range descriptor with start key %s at time %s\n%s\n", startKey, kv.Key.Timestamp.GoTime(), &desc) return false, nil }
// updateSystemConfig is the raw gossip info callback. // Unmarshal the system config, and if successfuly, update out // copy and run the callbacks. func (g *Gossip) updateSystemConfig(key string, content roachpb.Value) { if key != KeySystemConfig { log.Fatalf("wrong key received on SystemConfig callback: %s", key) return } cfg := &config.SystemConfig{} if err := content.GetProto(cfg); err != nil { log.Errorf("could not unmarshal system config on callback: %s", err) return } g.systemConfigMu.Lock() defer g.systemConfigMu.Unlock() g.systemConfig = cfg for _, cb := range g.systemConfigCallbacks { go cb(cfg) } }
// updateNodeAddress is a gossip callback which fires with each // update to the node address. This allows us to compute the // total size of the gossip network (for determining max peers // each gossip node is allowed to have), as well as to create // new resolvers for each encountered host and to write the // set of gossip node addresses to persistent storage when it // changes. func (g *Gossip) updateNodeAddress(_ string, content roachpb.Value) { var desc roachpb.NodeDescriptor if err := content.GetProto(&desc); err != nil { log.Error(err) return } g.mu.Lock() defer g.mu.Unlock() // Skip if the node has already been seen. if _, ok := g.nodeDescs[desc.NodeID]; ok { return } g.nodeDescs[desc.NodeID] = &desc // Recompute max peers based on size of network and set the max // sizes for incoming and outgoing node sets. maxPeers := g.maxPeers(len(g.nodeDescs)) g.incoming.setMaxSize(maxPeers) g.outgoing.setMaxSize(maxPeers) // Skip if it's our own address. if desc.Address == g.is.NodeAddr { return } // Add this new node address (if it's not already there) to our list // of resolvers so we can keep connecting to gossip if the original // resolvers go offline. g.maybeAddResolver(desc.Address) // Add new address (if it's not already there) to bootstrap info and // persist if possible. if g.maybeAddBootstrapAddress(desc.Address) && g.storage != nil { // TODO(spencer): need to clean up ancient gossip nodes, which // will otherwise stick around in the bootstrap info forever. if err := g.storage.WriteBootstrapInfo(&g.bootstrapInfo); err != nil { log.Error(err) } } }
// MergeInternalTimeSeriesData exports the engine's C++ merge logic for // InternalTimeSeriesData to higher level packages. This is intended primarily // for consumption by high level testing of time series functionality. func MergeInternalTimeSeriesData( sources ...roachpb.InternalTimeSeriesData, ) (roachpb.InternalTimeSeriesData, error) { // Wrap each proto in an inlined MVCC value, and marshal each wrapped value // to bytes. This is the format required by the engine. srcBytes := make([][]byte, 0, len(sources)) for _, src := range sources { var val roachpb.Value if err := val.SetProto(&src); err != nil { return roachpb.InternalTimeSeriesData{}, err } bytes, err := protoutil.Marshal(&MVCCMetadata{ RawBytes: val.RawBytes, }) if err != nil { return roachpb.InternalTimeSeriesData{}, err } srcBytes = append(srcBytes, bytes) } // Merge every element into a nil byte slice, one at a time. var ( mergedBytes []byte err error ) for _, bytes := range srcBytes { mergedBytes, err = goMerge(mergedBytes, bytes) if err != nil { return roachpb.InternalTimeSeriesData{}, err } } // Unmarshal merged bytes and extract the time series value within. var meta MVCCMetadata if err := proto.Unmarshal(mergedBytes, &meta); err != nil { return roachpb.InternalTimeSeriesData{}, err } mergedTS, err := meta.Value().GetTimeseries() if err != nil { return roachpb.InternalTimeSeriesData{}, err } return mergedTS, nil }
// storeGossipUpdate is the gossip callback used to keep the StorePool up to date. func (sp *StorePool) storeGossipUpdate(_ string, content roachpb.Value) { var storeDesc roachpb.StoreDescriptor if err := content.GetProto(&storeDesc); err != nil { log.Error(err) return } sp.mu.Lock() defer sp.mu.Unlock() // Does this storeDetail exist yet? detail, ok := sp.stores[storeDesc.StoreID] if !ok { // Setting index to -1 ensures this gets added to the queue. detail = &storeDetail{index: -1} sp.stores[storeDesc.StoreID] = detail } detail.markAlive(sp.clock.Now(), storeDesc, true) sp.queue.enqueue(detail) }