// mustGetInteger decodes an int64 value from the bytes field of the receiver // and panics if the bytes field is not 0 or 8 bytes in length. func mustGetInteger(v *proto.Value) int64 { i, err := v.GetInteger() if err != nil { panic(err) } return i }
// ConditionalPut sets the value for a specified key only if // the expected value matches. If not, the return value contains // the actual value. func (mvcc *MVCC) ConditionalPut(key Key, timestamp proto.Timestamp, value proto.Value, expValue *proto.Value, txn *proto.Transaction) (*proto.Value, error) { // Handle check for non-existence of key. In order to detect // the potential write intent by another concurrent transaction // with a newer timestamp, we need to use the max timestamp // while reading. existVal, err := mvcc.Get(key, proto.MaxTimestamp, txn) if err != nil { return nil, err } if expValue == nil && existVal != nil { return existVal, util.Errorf("key %q already exists", key) } else if expValue != nil { // Handle check for existence when there is no key. if existVal == nil { return nil, util.Errorf("key %q does not exist", key) } else if expValue.Bytes != nil && !bytes.Equal(expValue.Bytes, existVal.Bytes) { return existVal, util.Errorf("key %q does not match existing", key) } else if expValue.Integer != nil && (existVal.Integer == nil || expValue.GetInteger() != existVal.GetInteger()) { return existVal, util.Errorf("key %q does not match existing", key) } } return nil, mvcc.Put(key, timestamp, value, txn) }
// putInternal writes the specified value to key. func (kv *KV) putInternal(key proto.Key, value proto.Value) error { value.InitChecksum(key) return kv.Call(proto.Put, &proto.PutRequest{ RequestHeader: proto.RequestHeader{Key: key}, Value: value, }, &proto.PutResponse{}) }
// PutProto sets the given key to the protobuf-serialized byte string // of msg and the provided timestamp. func (mvcc *MVCC) PutProto(key Key, timestamp proto.Timestamp, txn *proto.Transaction, msg gogoproto.Message) error { data, err := gogoproto.Marshal(msg) if err != nil { return err } value := proto.Value{Bytes: data} value.InitChecksum(key) return mvcc.Put(key, timestamp, value, txn) }
// mustGetInt decodes an int64 value from the bytes field of the receiver // and panics if the bytes field is not 0 or 8 bytes in length. func mustGetInt(v *proto.Value) int64 { if v == nil { return 0 } i, err := v.GetInt() if err != nil { panic(err) } return i }
// Indirectly this tests that the transaction remembers the NodeID of the node // being read from correctly, at least in this simple case. Not remembering the // node would lead to thousands of transaction restarts and almost certainly a // test timeout. func TestUncertaintyRestarts(t *testing.T) { { db, eng, clock, mClock, _, transport, err := createTestDB() if err != nil { t.Fatal(err) } defer transport.Close() // Set a large offset so that a busy restart-loop // really shows. Also makes sure that the values // we write in the future below don't actually // wind up in the past. offset := 4000 * time.Millisecond clock.SetMaxOffset(offset) key := proto.Key("key") value := proto.Value{ Bytes: nil, // Set for each Put } // With the correct restart behaviour, we see only one restart // and the value read is the very first one (as nothing else // has been written) wantedBytes := []byte("value-0") txnOpts := &client.TransactionOptions{ Name: "uncertainty", } gr := &proto.GetResponse{} i := -1 tErr := db.RunTransaction(txnOpts, func(txn *client.KV) error { i++ mClock.Increment(1) futureTS := clock.Now() futureTS.WallTime++ value.Bytes = []byte(fmt.Sprintf("value-%d", i)) err = engine.MVCCPut(eng, nil, key, futureTS, value, nil) if err != nil { t.Fatal(err) } gr.Reset() if err := txn.Call(proto.Get, proto.GetArgs(key), gr); err != nil { return err } if gr.Value == nil || !bytes.Equal(gr.Value.Bytes, wantedBytes) { t.Fatalf("%d: read wrong value: %v, wanted %q", i, gr.Value, wantedBytes) } return nil }) if i != 1 { t.Errorf("txn restarted %d times, expected only one restart", i) } if tErr != nil { t.Fatal(tErr) } } }
// putInternal writes the specified value to key. func putInternal(db DB, key engine.Key, value proto.Value, timestamp proto.Timestamp) error { value.InitChecksum(key) pr := <-db.Put(&proto.PutRequest{ RequestHeader: proto.RequestHeader{ Key: key, User: UserRoot, Timestamp: timestamp, }, Value: value, }) return pr.GoError() }
// PreparePutProto sets the given key to the protobuf-serialized byte // string of msg. The resulting Put call is buffered and will not be // sent until a subsequent call to Flush. Returns marshalling errors // if encountered. func (kv *KV) PreparePutProto(key proto.Key, msg gogoproto.Message) error { data, err := gogoproto.Marshal(msg) if err != nil { return err } value := proto.Value{Bytes: data} value.InitChecksum(key) kv.Prepare(proto.Put, &proto.PutRequest{ RequestHeader: proto.RequestHeader{Key: key}, Value: value, }, &proto.PutResponse{}) return nil }
// setupMVCCData writes up to numVersions values at each of numKeys // keys. The number of versions written for each key is chosen // randomly according to a uniform distribution. Each successive // version is written starting at 5ns and then in 5ns increments. This // allows scans at various times, starting at t=5ns, and continuing to // t=5ns*(numVersions+1). A version for each key will be read on every // such scan, but the dynamics of the scan will change depending on // the historical timestamp. Earlier timestamps mean scans which must // skip more historical versions; later timestamps mean scans which // skip fewer. // // The creation of the rocksdb database is time consuming, especially // for larger numbers of versions. The database is persisted between // runs and stored in the current directory as // "mvcc_scan_<versions>_<keys>". func setupMVCCScanData(numVersions, numKeys int, b *testing.B) (*RocksDB, *stop.Stopper) { loc := fmt.Sprintf("mvcc_scan_%d_%d", numVersions, numKeys) exists := true if _, err := os.Stat(loc); os.IsNotExist(err) { exists = false } log.Infof("creating mvcc data: %s", loc) const cacheSize = 8 << 30 // 8 GB stopper := stop.NewStopper() rocksdb := NewRocksDB(proto.Attributes{Attrs: []string{"ssd"}}, loc, cacheSize, stopper) if err := rocksdb.Open(); err != nil { b.Fatalf("could not create new rocksdb db instance at %s: %v", loc, err) } if exists { return rocksdb, stopper } rng, _ := randutil.NewPseudoRand() keys := make([]proto.Key, numKeys) nvs := make([]int, numKeys) for t := 1; t <= numVersions; t++ { walltime := int64(5 * t) ts := makeTS(walltime, 0) batch := rocksdb.NewBatch() for i := 0; i < numKeys; i++ { if t == 1 { keys[i] = proto.Key(encoding.EncodeUvarint([]byte("key-"), uint64(i))) nvs[i] = int(rand.Int31n(int32(numVersions)) + 1) } // Only write values if this iteration is less than the random // number of versions chosen for this key. if t <= nvs[i] { value := proto.Value{Bytes: randutil.RandBytes(rng, 1024)} value.InitChecksum(keys[i]) if err := MVCCPut(batch, nil, keys[i], ts, value, nil); err != nil { b.Fatal(err) } } } if err := batch.Commit(); err != nil { b.Fatal(err) } batch.Close() } rocksdb.CompactRange(nil, nil) return rocksdb, stopper }
// Indirectly this tests that the transaction remembers the NodeID of the node // being read from correctly, at least in this simple case. Not remembering the // node would lead to thousands of transaction restarts and almost certainly a // test timeout. func TestUncertaintyRestarts(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() // Set a large offset so that a busy restart-loop // really shows. Also makes sure that the values // we write in the future below don't actually // wind up in the past. offset := 4000 * time.Millisecond s.Clock.SetMaxOffset(offset) key := proto.Key("key") value := proto.Value{ Bytes: nil, // Set for each Put } // With the correct restart behaviour, we see only one restart // and the value read is the very first one (as nothing else // has been written) wantedBytes := []byte("value-0") i := -1 tErr := s.DB.Txn(func(txn *client.Txn) error { i++ s.Manual.Increment(1) futureTS := s.Clock.Now() futureTS.WallTime++ value.Bytes = []byte(fmt.Sprintf("value-%d", i)) if err := engine.MVCCPut(s.Eng, nil, key, futureTS, value, nil); err != nil { t.Fatal(err) } gr, err := txn.Get(key) if err != nil { return err } if !gr.Exists() || !bytes.Equal(gr.ValueBytes(), wantedBytes) { t.Fatalf("%d: read wrong value: %v, wanted %q", i, gr.Value, wantedBytes) } return nil }) if i != 1 { t.Errorf("txn restarted %d times, expected only one restart", i) } if tErr != nil { t.Fatal(tErr) } }
// GetInitialSystemValues returns a list of key/value pairs. // They are written at cluster bootstrap time (see storage/node.go:BootstrapCLuster). func GetInitialSystemValues() []proto.KeyValue { systemData := []struct { parentID ID desc descriptorProto }{ {keys.RootNamespaceID, &SystemDB}, {SystemDB.ID, &NamespaceTable}, {SystemDB.ID, &DescriptorTable}, {SystemDB.ID, &UsersTable}, {SystemDB.ID, &ZonesTable}, } // Initial kv pairs: // - ID generator // - 2 per table/database numEntries := 1 + len(systemData)*2 ret := make([]proto.KeyValue, numEntries, numEntries) i := 0 // Descriptor ID generator. value := proto.Value{} value.SetInt(int64(keys.MaxReservedDescID + 1)) ret[i] = proto.KeyValue{ Key: keys.DescIDGenerator, Value: value, } i++ // System database and tables. for _, d := range systemData { value = proto.Value{} value.SetInt(int64(d.desc.GetID())) ret[i] = proto.KeyValue{ Key: MakeNameMetadataKey(d.parentID, d.desc.GetName()), Value: value, } i++ value = proto.Value{} if err := value.SetProto(d.desc); err != nil { log.Fatalf("could not marshal %v", d.desc) } ret[i] = proto.KeyValue{ Key: MakeDescMetadataKey(d.desc.GetID()), Value: value, } i++ } return ret }
// marshalValue returns a proto.Value initialized from the source // reflect.Value, returning an error if the types are not compatible. func marshalValue(v interface{}) (proto.Value, error) { var r proto.Value if v == nil { return r, nil } switch t := v.(type) { case nil: return r, nil case string: r.SetBytes([]byte(t)) return r, nil case []byte: r.SetBytes(t) return r, nil case proto.Key: r.SetBytes([]byte(t)) return r, nil case time.Time: err := r.SetTime(t) return r, err case gogoproto.Message: err := r.SetProto(t) return r, err } switch v := reflect.ValueOf(v); v.Kind() { case reflect.Bool: i := int64(0) if v.Bool() { i = 1 } r.SetInt(i) return r, nil case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: r.SetInt(v.Int()) return r, nil case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: r.SetInt(int64(v.Uint())) return r, nil case reflect.Float32, reflect.Float64: r.SetFloat(v.Float()) return r, nil case reflect.String: r.SetBytes([]byte(v.String())) return r, nil } return r, fmt.Errorf("unable to marshal value: %s", v) }
// marshalValue returns a proto.Value initialized from the source // reflect.Value, returning an error if the types are not compatible. func marshalValue(v reflect.Value) (proto.Value, error) { var r proto.Value if !v.IsValid() { return r, nil } switch t := v.Interface().(type) { case nil: return r, nil case string: r.Bytes = []byte(t) return r, nil case []byte: r.Bytes = t return r, nil case proto.Key: r.Bytes = []byte(t) return r, nil case gogoproto.Message: var err error r.Bytes, err = gogoproto.Marshal(t) return r, err case encoding.BinaryMarshaler: var err error r.Bytes, err = t.MarshalBinary() return r, err } switch v.Kind() { case reflect.Bool: i := int64(0) if v.Bool() { i = 1 } r.SetInteger(i) return r, nil case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: r.SetInteger(v.Int()) return r, nil case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: r.SetInteger(int64(v.Uint())) return r, nil case reflect.Float32, reflect.Float64: r.SetInteger(int64(math.Float64bits(v.Float()))) return r, nil case reflect.String: r.Bytes = []byte(v.String()) return r, nil } return r, fmt.Errorf("unable to marshal value: %s", v) }
// unmarshalValue sets the destination reflect.Value contents from the source // proto.Value, returning an error if the types are not compatible. func unmarshalValue(src *proto.Value, dest reflect.Value) error { if src == nil { dest.Set(reflect.Zero(dest.Type())) return nil } switch d := dest.Addr().Interface().(type) { case *string: if src.Bytes != nil { *d = string(src.Bytes) } else { *d = "" } return nil case *[]byte: if src.Bytes != nil { *d = src.Bytes } else { *d = nil } return nil case *gogoproto.Message: panic("TODO(pmattis): unimplemented") case *encoding.BinaryUnmarshaler: return (*d).UnmarshalBinary(src.Bytes) } switch dest.Kind() { case reflect.Bool: i, err := src.GetInteger() if err != nil { return err } dest.SetBool(i != 0) return nil case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: i, err := src.GetInteger() if err != nil { return err } dest.SetInt(i) return nil case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: i, err := src.GetInteger() if err != nil { return err } dest.SetUint(uint64(i)) return nil case reflect.Float32, reflect.Float64: i, err := src.GetInteger() if err != nil { return err } dest.SetFloat(math.Float64frombits(uint64(i))) return nil case reflect.String: if src == nil || src.Bytes == nil { dest.SetString("") return nil } dest.SetString(string(src.Bytes)) return nil } return fmt.Errorf("unable to unmarshal value: %s", dest.Type()) }
// unmarshalColumnValue decodes the value from a key-value pair using the type // expected by the column. An error is returned if the value's type does not // match the column's type. func unmarshalColumnValue(kind ColumnType_Kind, value *proto.Value) (parser.Datum, error) { if value == nil { return parser.DNull, nil } switch kind { case ColumnType_BOOL: v, err := value.GetInt() if err != nil { return nil, err } return parser.DBool(v != 0), nil case ColumnType_INT: v, err := value.GetInt() if err != nil { return nil, err } return parser.DInt(v), nil case ColumnType_FLOAT: v, err := value.GetFloat() if err != nil { return nil, err } return parser.DFloat(v), nil case ColumnType_STRING: v, err := value.GetBytesChecked() if err != nil { return nil, err } return parser.DString(v), nil case ColumnType_BYTES: v, err := value.GetBytesChecked() if err != nil { return nil, err } return parser.DBytes(v), nil case ColumnType_DATE: v, err := value.GetTime() if err != nil { return nil, err } return parser.DDate{Time: v}, nil case ColumnType_TIMESTAMP: v, err := value.GetTime() if err != nil { return nil, err } return parser.DTimestamp{Time: v}, nil case ColumnType_INTERVAL: v, err := value.GetInt() if err != nil { return nil, err } return parser.DInterval{Duration: time.Duration(v)}, nil default: return nil, util.Errorf("unsupported column type: %s", kind) } }
// marshalValue returns a proto.Value initialized from the source // interface{}, returning an error if the types are not compatible. func marshalValue(v interface{}) (proto.Value, error) { var r proto.Value // Handle a few common types via a type switch. switch t := v.(type) { case nil: return r, nil case bool: i := int64(0) if t { i = 1 } r.SetInt(i) return r, nil case string: r.SetBytes([]byte(t)) return r, nil case []byte: r.SetBytes(t) return r, nil case proto.Key: r.SetBytes([]byte(t)) return r, nil case time.Time: r.SetTime(t) return r, nil case gogoproto.Message: err := r.SetProto(t) return r, err } // Handle all of the Go primitive types besides struct and pointers. This // switch also handles types based on a primitive type (e.g. "type MyInt // int"). switch v := reflect.ValueOf(v); v.Kind() { case reflect.Bool: i := int64(0) if v.Bool() { i = 1 } r.SetInt(i) return r, nil case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: r.SetInt(v.Int()) return r, nil case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: r.SetInt(int64(v.Uint())) return r, nil case reflect.Float32, reflect.Float64: r.SetFloat(v.Float()) return r, nil case reflect.String: r.SetBytes([]byte(v.String())) return r, nil } return r, fmt.Errorf("unable to marshal value: %s", v) }