// fillRange writes keys with the given prefix and associated values // until bytes bytes have been written or the given range has split. func fillRange(store *storage.Store, rangeID roachpb.RangeID, prefix roachpb.Key, bytes int64, t *testing.T) { src := rand.New(rand.NewSource(0)) for { var ms engine.MVCCStats if err := engine.MVCCGetRangeStats(store.Engine(), rangeID, &ms); err != nil { t.Fatal(err) } keyBytes, valBytes := ms.KeyBytes, ms.ValBytes if keyBytes+valBytes >= bytes { return } key := append(append([]byte(nil), prefix...), randutil.RandBytes(src, 100)...) key = keys.MakeNonColumnKey(key) val := randutil.RandBytes(src, int(src.Int31n(1<<8))) pArgs := putArgs(key, val) _, err := client.SendWrappedWith(store, nil, roachpb.Header{ RangeID: rangeID, }, &pArgs) // When the split occurs in the background, our writes may start failing. // We know we can stop writing when this happens. if _, ok := err.(*roachpb.RangeKeyMismatchError); ok { return } else if err != nil { t.Fatal(err) } } }
// EncodeSecondaryIndexes encodes key/values for the secondary indexes. colMap // maps ColumnIDs to indices in `values`. func EncodeSecondaryIndexes( tableID ID, indexes []IndexDescriptor, colMap map[ColumnID]int, values []parser.Datum, ) ([]IndexEntry, error) { var secondaryIndexEntries []IndexEntry for _, secondaryIndex := range indexes { secondaryIndexKeyPrefix := MakeIndexKeyPrefix(tableID, secondaryIndex.ID) secondaryIndexKey, containsNull, err := EncodeIndexKey( &secondaryIndex, colMap, values, secondaryIndexKeyPrefix) if err != nil { return nil, err } // Add the implicit columns - they are encoded ascendingly. implicitDirs := make([]encoding.Direction, 0, len(secondaryIndex.ImplicitColumnIDs)) for range secondaryIndex.ImplicitColumnIDs { implicitDirs = append(implicitDirs, encoding.Ascending) } extraKey, _, err := EncodeColumns(secondaryIndex.ImplicitColumnIDs, implicitDirs, colMap, values, nil) if err != nil { return nil, err } entry := IndexEntry{Key: secondaryIndexKey} if !secondaryIndex.Unique || containsNull { // If the index is not unique or it contains a NULL value, append // extraKey to the key in order to make it unique. entry.Key = append(entry.Key, extraKey...) } // Index keys are considered "sentinel" keys in that they do not have a // column ID suffix. entry.Key = keys.MakeNonColumnKey(entry.Key) if secondaryIndex.Unique { // Note that a unique secondary index that contains a NULL column value // will have extraKey appended to the key and stored in the value. We // require extraKey to be appended to the key in order to make the key // unique. We could potentially get rid of the duplication here but at // the expense of complicating scanNode when dealing with unique // secondary indexes. entry.Value = extraKey } secondaryIndexEntries = append(secondaryIndexEntries, entry) } return secondaryIndexEntries, nil }
// TestStoreRangeSplitAtTablePrefix verifies a range can be split at // UserTableDataMin and still gossip the SystemConfig properly. func TestStoreRangeSplitAtTablePrefix(t *testing.T) { defer leaktest.AfterTest(t) defer config.TestingDisableTableSplits()() store, stopper := createTestStore(t) defer stopper.Stop() key := keys.MakeNonColumnKey(append([]byte(nil), keys.UserTableDataMin...)) args := adminSplitArgs(key, key) _, err := client.SendWrapped(rg1(store), nil, &args) if err != nil { t.Fatalf("%q: split unexpected error: %s", key, err) } desc := &sql.TableDescriptor{} descBytes, err := desc.Marshal() if err != nil { t.Fatal(err) } // Update SystemConfig to trigger gossip. if err := store.DB().Txn(func(txn *client.Txn) error { txn.SetSystemConfigTrigger() // We don't care about the values, just the keys. k := sql.MakeDescMetadataKey(sql.ID(keys.MaxReservedDescID + 1)) return txn.Put(k, desc) }); err != nil { t.Fatal(err) } successChan := make(chan struct{}, 1) store.Gossip().RegisterCallback(gossip.KeySystemConfig, func(_ string, content roachpb.Value) { contentBytes, err := content.GetBytes() if err != nil { t.Fatal(err) } if bytes.Contains(contentBytes, descBytes) { select { case successChan <- struct{}{}: default: } } }) select { case <-time.After(time.Second): t.Errorf("expected a schema gossip containing %q, but did not see one", descBytes) case <-successChan: } }
func writeRandomDataToRange(t testing.TB, store *storage.Store, rangeID roachpb.RangeID, keyPrefix []byte) { src := rand.New(rand.NewSource(0)) for i := 0; i < 100; i++ { key := append([]byte(nil), keyPrefix...) key = append(key, randutil.RandBytes(src, int(src.Int31n(1<<7)))...) key = keys.MakeNonColumnKey(key) val := randutil.RandBytes(src, int(src.Int31n(1<<8))) pArgs := putArgs(key, val) if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rangeID, }, &pArgs); pErr != nil { t.Fatal(pErr) } } }
// EncodeSecondaryIndex encodes key/values for a secondary index. colMap maps // ColumnIDs to indices in `values`. func EncodeSecondaryIndex( tableID ID, secondaryIndex *IndexDescriptor, colMap map[ColumnID]int, values []parser.Datum, ) (IndexEntry, error) { secondaryIndexKeyPrefix := MakeIndexKeyPrefix(tableID, secondaryIndex.ID) secondaryIndexKey, containsNull, err := EncodeIndexKey( secondaryIndex, colMap, values, secondaryIndexKeyPrefix) if err != nil { return IndexEntry{}, err } // Add the implicit columns - they are encoded ascendingly which is done by // passing nil for the encoding directions. extraKey, _, err := EncodeColumns(secondaryIndex.ImplicitColumnIDs, nil, colMap, values, nil) if err != nil { return IndexEntry{}, err } entry := IndexEntry{Key: secondaryIndexKey} if !secondaryIndex.Unique || containsNull { // If the index is not unique or it contains a NULL value, append // extraKey to the key in order to make it unique. entry.Key = append(entry.Key, extraKey...) } // Index keys are considered "sentinel" keys in that they do not have a // column ID suffix. entry.Key = keys.MakeNonColumnKey(entry.Key) if secondaryIndex.Unique { // Note that a unique secondary index that contains a NULL column value // will have extraKey appended to the key and stored in the value. We // require extraKey to be appended to the key in order to make the key // unique. We could potentially get rid of the duplication here but at // the expense of complicating scanNode when dealing with unique // secondary indexes. entry.Value.SetBytes(extraKey) } else { // The zero value for an index-key is a 0-length bytes value. entry.Value.SetBytes([]byte{}) } return entry, nil }
func encodeSecondaryIndexes(tableID ID, indexes []IndexDescriptor, colMap map[ColumnID]int, values []parser.Datum) ([]indexEntry, *roachpb.Error) { var secondaryIndexEntries []indexEntry for _, secondaryIndex := range indexes { secondaryIndexKeyPrefix := MakeIndexKeyPrefix(tableID, secondaryIndex.ID) secondaryIndexKey, containsNull, pErr := encodeIndexKey( secondaryIndex.ColumnIDs, colMap, values, secondaryIndexKeyPrefix) if pErr != nil { return nil, pErr } extraKey, _, pErr := encodeIndexKey(secondaryIndex.ImplicitColumnIDs, colMap, values, nil) if pErr != nil { return nil, pErr } entry := indexEntry{key: secondaryIndexKey} if !secondaryIndex.Unique || containsNull { // If the index is not unique or it contains a NULL value, append // extraKey to the key in order to make it unique. entry.key = append(entry.key, extraKey...) } // Index keys are considered "sentinel" keys in that they do not have a // column ID suffix. entry.key = keys.MakeNonColumnKey(entry.key) if secondaryIndex.Unique { // Note that a unique secondary index that contains a NULL column value // will have extraKey appended to the key and stored in the value. We // require extraKey to be appended to the key in order to make the key // unique. We could potentially get rid of the duplication here but at // the expense of complicating scanNode when dealing with unique // secondary indexes. entry.value = extraKey } secondaryIndexEntries = append(secondaryIndexEntries, entry) } return secondaryIndexEntries, nil }
func BenchmarkReplicaSnapshot(b *testing.B) { defer tracing.Disable()() defer config.TestingDisableTableSplits()() store, stopper, _ := createTestStore(b) // We want to manually control the size of the raft log. store.DisableRaftLogQueue(true) defer stopper.Stop() const rangeID = 1 const keySize = 1 << 7 // 128 B const valSize = 1 << 10 // 1 KiB const snapSize = 1 << 25 // 32 MiB rep, err := store.GetReplica(rangeID) if err != nil { b.Fatal(err) } src := rand.New(rand.NewSource(0)) for i := 0; i < snapSize/(keySize+valSize); i++ { key := keys.MakeNonColumnKey(randutil.RandBytes(src, keySize)) val := randutil.RandBytes(src, valSize) pArgs := putArgs(key, val) if _, pErr := client.SendWrappedWith(rep, nil, roachpb.Header{ RangeID: rangeID, }, &pArgs); pErr != nil { b.Fatal(pErr) } } b.ResetTimer() for i := 0; i < b.N; i++ { if _, err := rep.GetSnapshot(); err != nil { b.Fatal(err) } } }
// TestStoreRangeSplitStats starts by splitting the system keys from user-space // keys and verifying that the user space side of the split (which is empty), // has all zeros for stats. It then writes random data to the user space side, // splits it halfway and verifies the two splits have stats exactly equaling // the pre-split. func TestStoreRangeSplitStats(t *testing.T) { defer leaktest.AfterTest(t) store, stopper := createTestStore(t) defer stopper.Stop() // Split the range after the last table data key. keyPrefix := keys.MakeTablePrefix(keys.MaxReservedDescID + 1) keyPrefix = keys.MakeNonColumnKey(keyPrefix) args := adminSplitArgs(roachpb.KeyMin, keyPrefix) if _, err := client.SendWrapped(rg1(store), nil, &args); err != nil { t.Fatal(err) } // Verify empty range has empty stats. rng := store.LookupReplica(keyPrefix, nil) // NOTE that this value is expected to change over time, depending on what // we store in the sys-local keyspace. Update it accordingly for this test. if err := verifyRangeStats(store.Engine(), rng.Desc().RangeID, engine.MVCCStats{}); err != nil { t.Fatal(err) } // Write random data. src := rand.New(rand.NewSource(0)) for i := 0; i < 100; i++ { key := append([]byte(nil), keyPrefix...) key = append(key, randutil.RandBytes(src, int(src.Int31n(1<<7)))...) key = keys.MakeNonColumnKey(key) val := randutil.RandBytes(src, int(src.Int31n(1<<8))) pArgs := putArgs(key, val) if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rng.Desc().RangeID, }, &pArgs); err != nil { t.Fatal(err) } } // Get the range stats now that we have data. var ms engine.MVCCStats if err := engine.MVCCGetRangeStats(store.Engine(), rng.Desc().RangeID, &ms); err != nil { t.Fatal(err) } // Split the range at approximate halfway point ("Z" in string "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"). midKey := append([]byte(nil), keyPrefix...) midKey = append(midKey, []byte("Z")...) midKey = keys.MakeNonColumnKey(midKey) args = adminSplitArgs(keyPrefix, midKey) if _, err := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rng.Desc().RangeID, }, &args); err != nil { t.Fatal(err) } var msLeft, msRight engine.MVCCStats if err := engine.MVCCGetRangeStats(store.Engine(), rng.Desc().RangeID, &msLeft); err != nil { t.Fatal(err) } rngRight := store.LookupReplica(midKey, nil) if err := engine.MVCCGetRangeStats(store.Engine(), rngRight.Desc().RangeID, &msRight); err != nil { t.Fatal(err) } // The stats should be exactly equal when added. expMS := engine.MVCCStats{ LiveBytes: msLeft.LiveBytes + msRight.LiveBytes, KeyBytes: msLeft.KeyBytes + msRight.KeyBytes, ValBytes: msLeft.ValBytes + msRight.ValBytes, IntentBytes: msLeft.IntentBytes + msRight.IntentBytes, LiveCount: msLeft.LiveCount + msRight.LiveCount, KeyCount: msLeft.KeyCount + msRight.KeyCount, ValCount: msLeft.ValCount + msRight.ValCount, IntentCount: msLeft.IntentCount + msRight.IntentCount, } ms.SysBytes, ms.SysCount = 0, 0 if !reflect.DeepEqual(expMS, ms) { t.Errorf("expected left and right ranges to equal original: %+v + %+v != %+v", msLeft, msRight, ms) } }
func TestComputeSplits(t *testing.T) { defer leaktest.AfterTest(t) const ( start = keys.MaxReservedDescID + 1 reservedStart = keys.MaxSystemConfigDescID + 1 ) schema := sql.MakeMetadataSchema() // Real SQL system tables only. baseSql := schema.GetInitialValues() // Real SQL system tables plus some user stuff. userSql := append(schema.GetInitialValues(), descriptor(start), descriptor(start+1), descriptor(start+5)) // Real SQL system with reserved non-system tables. schema.AddTable(reservedStart+1, "CREATE TABLE system.test1 (i INT PRIMARY KEY)", privilege.List{privilege.ALL}) schema.AddTable(reservedStart+2, "CREATE TABLE system.test2 (i INT PRIMARY KEY)", privilege.List{privilege.ALL}) reservedSql := schema.GetInitialValues() // Real SQL system with reserved non-system and user database. allSql := append(schema.GetInitialValues(), descriptor(start), descriptor(start+1), descriptor(start+5)) allUserSplits := []uint32{start, start + 1, start + 2, start + 3, start + 4, start + 5} allReservedSplits := []uint32{reservedStart, reservedStart + 1, reservedStart + 2} allSplits := append(allReservedSplits, allUserSplits...) testCases := []struct { values []roachpb.KeyValue start, end roachpb.RKey // Use ints in the testcase definitions, more readable. splits []uint32 }{ // No data. {nil, roachpb.RKeyMin, roachpb.RKeyMax, nil}, {nil, keys.MakeTablePrefix(start), roachpb.RKeyMax, nil}, {nil, keys.MakeTablePrefix(start), keys.MakeTablePrefix(start + 10), nil}, {nil, roachpb.RKeyMin, keys.MakeTablePrefix(start + 10), nil}, // No user data. {baseSql, roachpb.RKeyMin, roachpb.RKeyMax, allReservedSplits[:1]}, {baseSql, keys.MakeTablePrefix(start), roachpb.RKeyMax, nil}, {baseSql, keys.MakeTablePrefix(start), keys.MakeTablePrefix(start + 10), nil}, {baseSql, roachpb.RKeyMin, keys.MakeTablePrefix(start + 10), allReservedSplits[:1]}, // User descriptors. {userSql, keys.MakeTablePrefix(start - 1), roachpb.RKeyMax, allUserSplits}, {userSql, keys.MakeTablePrefix(start), roachpb.RKeyMax, allUserSplits[1:]}, {userSql, keys.MakeTablePrefix(start), keys.MakeTablePrefix(start + 10), allUserSplits[1:]}, {userSql, keys.MakeTablePrefix(start - 1), keys.MakeTablePrefix(start + 10), allUserSplits}, {userSql, keys.MakeTablePrefix(start + 4), keys.MakeTablePrefix(start + 10), allUserSplits[5:]}, {userSql, keys.MakeTablePrefix(start + 5), keys.MakeTablePrefix(start + 10), nil}, {userSql, keys.MakeTablePrefix(start + 6), keys.MakeTablePrefix(start + 10), nil}, {userSql, keys.MakeKey(keys.MakeTablePrefix(start), roachpb.RKey("foo")), keys.MakeTablePrefix(start + 10), allUserSplits[1:]}, {userSql, keys.MakeKey(keys.MakeTablePrefix(start), roachpb.RKey("foo")), keys.MakeTablePrefix(start + 5), allUserSplits[1:5]}, {userSql, keys.MakeKey(keys.MakeTablePrefix(start), roachpb.RKey("foo")), keys.MakeKey(keys.MakeTablePrefix(start+5), roachpb.RKey("bar")), allUserSplits[1:5]}, {userSql, keys.MakeKey(keys.MakeTablePrefix(start), roachpb.RKey("foo")), keys.MakeKey(keys.MakeTablePrefix(start), roachpb.RKey("morefoo")), nil}, // Reserved descriptors. {reservedSql, roachpb.RKeyMin, roachpb.RKeyMax, allReservedSplits}, {reservedSql, keys.MakeTablePrefix(reservedStart), roachpb.RKeyMax, allReservedSplits[1:]}, {reservedSql, keys.MakeTablePrefix(start), roachpb.RKeyMax, nil}, {reservedSql, keys.MakeTablePrefix(reservedStart), keys.MakeTablePrefix(start + 10), allReservedSplits[1:]}, {reservedSql, roachpb.RKeyMin, keys.MakeTablePrefix(reservedStart + 2), allReservedSplits[:2]}, {reservedSql, roachpb.RKeyMin, keys.MakeTablePrefix(reservedStart + 10), allReservedSplits}, {reservedSql, keys.MakeTablePrefix(reservedStart), keys.MakeTablePrefix(reservedStart + 2), allReservedSplits[1:2]}, {reservedSql, keys.MakeKey(keys.MakeTablePrefix(reservedStart), roachpb.RKey("foo")), keys.MakeKey(keys.MakeTablePrefix(start+10), roachpb.RKey("foo")), allReservedSplits[1:]}, // Reserved/User mix. {allSql, roachpb.RKeyMin, roachpb.RKeyMax, allSplits}, {allSql, keys.MakeTablePrefix(reservedStart + 1), roachpb.RKeyMax, allSplits[2:]}, {allSql, keys.MakeTablePrefix(start), roachpb.RKeyMax, allSplits[4:]}, {allSql, keys.MakeTablePrefix(reservedStart), keys.MakeTablePrefix(start + 10), allSplits[1:]}, {allSql, roachpb.RKeyMin, keys.MakeTablePrefix(start + 2), allSplits[:5]}, {allSql, keys.MakeKey(keys.MakeTablePrefix(reservedStart), roachpb.RKey("foo")), keys.MakeKey(keys.MakeTablePrefix(start+5), roachpb.RKey("foo")), allSplits[1:8]}, } cfg := config.SystemConfig{} for tcNum, tc := range testCases { cfg.Values = tc.values splits := cfg.ComputeSplitKeys(tc.start, tc.end) if len(splits) == 0 && len(tc.splits) == 0 { continue } // Convert ints to actual keys. expected := []roachpb.RKey{} for _, s := range tc.splits { expected = append(expected, keys.MakeNonColumnKey(keys.MakeTablePrefix(s))) } if !reflect.DeepEqual(splits, expected) { t.Errorf("#%d: bad splits:\ngot: %v\nexpected: %v", tcNum, splits, expected) } } }
// ComputeSplitKeys takes a start and end key and returns an array of keys // at which to split the span [start, end). // The only required splits are at each user table prefix. func (s SystemConfig) ComputeSplitKeys(startKey, endKey roachpb.RKey) []roachpb.RKey { if TestingTableSplitsDisabled() { return nil } tableStart := roachpb.RKey(keys.ReservedTableDataMin) if !tableStart.Less(endKey) { // This range is before the user tables span: no required splits. return nil } startID, ok := ObjectIDForKey(startKey) if !ok || startID <= keys.MaxSystemDescID { // The start key is either: // - not part of the structured data span // - part of the system span // In either case, start looking for splits at the first ID usable // by the user data span. startID = keys.MaxSystemDescID + 1 } else { // The start key is either already a split key, or after the split // key for its ID. We can skip straight to the next one. startID++ } // Build key prefixes for sequential table IDs until we reach endKey. Note // that there are two disjoint sets of sequential keys: non-system reserved // tables have sequential IDs, as do user tables, but the two ranges contain a // gap. var splitKeys []roachpb.RKey var key roachpb.RKey // appendSplitKeys generates all possible split keys between the given range // of IDs and adds them to splitKeys. appendSplitKeys := func(startID, endID uint32) { // endID could be smaller than startID if we don't have user tables. for id := startID; id <= endID; id++ { key = keys.MakeNonColumnKey(keys.MakeTablePrefix(id)) // Skip if this ID matches the startKey passed to ComputeSplitKeys. if !startKey.Less(key) { continue } // Handle the case where EndKey is already a table prefix. if !key.Less(endKey) { break } splitKeys = append(splitKeys, key) } } // If the startKey falls within the non-system reserved range, compute those // keys first. if startID <= keys.MaxReservedDescID { endID, err := s.GetLargestObjectID(keys.MaxReservedDescID) if err != nil { log.Errorf("unable to determine largest reserved object ID from system config: %s", err) return nil } appendSplitKeys(startID, endID) startID = keys.MaxReservedDescID + 1 } // Append keys in the user space. endID, err := s.GetLargestObjectID(0) if err != nil { log.Errorf("unable to determine largest object ID from system config: %s", err) return nil } appendSplitKeys(startID, endID) return splitKeys }
// TestStoreRangeSplitStats starts by splitting the system keys from user-space // keys and verifying that the user space side of the split (which is empty), // has all zeros for stats. It then writes random data to the user space side, // splits it halfway and verifies the two splits have stats exactly equaling // the pre-split. func TestStoreRangeSplitStats(t *testing.T) { defer leaktest.AfterTest(t)() defer config.TestingDisableTableSplits()() store, stopper, manual := createTestStore(t) defer stopper.Stop() // Split the range after the last table data key. keyPrefix := keys.MakeTablePrefix(keys.MaxReservedDescID + 1) keyPrefix = keys.MakeNonColumnKey(keyPrefix) args := adminSplitArgs(roachpb.KeyMin, keyPrefix) if _, pErr := client.SendWrapped(rg1(store), nil, &args); pErr != nil { t.Fatal(pErr) } // Verify empty range has empty stats. rng := store.LookupReplica(keyPrefix, nil) // NOTE that this value is expected to change over time, depending on what // we store in the sys-local keyspace. Update it accordingly for this test. if err := verifyRangeStats(store.Engine(), rng.RangeID, engine.MVCCStats{LastUpdateNanos: manual.UnixNano()}); err != nil { t.Fatal(err) } // Write random data. writeRandomDataToRange(t, store, rng.RangeID, keyPrefix) // Get the range stats now that we have data. snap := store.Engine().NewSnapshot() defer snap.Close() var ms engine.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &ms); err != nil { t.Fatal(err) } if err := verifyRecomputedStats(snap, rng.Desc(), ms, manual.UnixNano()); err != nil { t.Fatalf("failed to verify range's stats before split: %v", err) } manual.Increment(100) // Split the range at approximate halfway point ("Z" in string "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"). midKey := append([]byte(nil), keyPrefix...) midKey = append(midKey, []byte("Z")...) midKey = keys.MakeNonColumnKey(midKey) args = adminSplitArgs(keyPrefix, midKey) if _, pErr := client.SendWrappedWith(rg1(store), nil, roachpb.Header{ RangeID: rng.RangeID, }, &args); pErr != nil { t.Fatal(pErr) } snap = store.Engine().NewSnapshot() defer snap.Close() var msLeft, msRight engine.MVCCStats if err := engine.MVCCGetRangeStats(context.Background(), snap, rng.RangeID, &msLeft); err != nil { t.Fatal(err) } rngRight := store.LookupReplica(midKey, nil) if err := engine.MVCCGetRangeStats(context.Background(), snap, rngRight.RangeID, &msRight); err != nil { t.Fatal(err) } // The stats should be exactly equal when added. expMS := engine.MVCCStats{ LiveBytes: msLeft.LiveBytes + msRight.LiveBytes, KeyBytes: msLeft.KeyBytes + msRight.KeyBytes, ValBytes: msLeft.ValBytes + msRight.ValBytes, IntentBytes: msLeft.IntentBytes + msRight.IntentBytes, LiveCount: msLeft.LiveCount + msRight.LiveCount, KeyCount: msLeft.KeyCount + msRight.KeyCount, ValCount: msLeft.ValCount + msRight.ValCount, IntentCount: msLeft.IntentCount + msRight.IntentCount, } ms.SysBytes, ms.SysCount = 0, 0 ms.LastUpdateNanos = 0 if expMS != ms { t.Errorf("expected left and right ranges to equal original: %+v + %+v != %+v", msLeft, msRight, ms) } // Stats should both have the new timestamp. now := manual.UnixNano() if lTs := msLeft.LastUpdateNanos; lTs != now { t.Errorf("expected left range stats to have new timestamp, want %d, got %d", now, lTs) } if rTs := msRight.LastUpdateNanos; rTs != now { t.Errorf("expected right range stats to have new timestamp, want %d, got %d", now, rTs) } // Stats should agree with recomputation. if err := verifyRecomputedStats(snap, rng.Desc(), msLeft, now); err != nil { t.Fatalf("failed to verify left range's stats after split: %v", err) } if err := verifyRecomputedStats(snap, rngRight.Desc(), msRight, now); err != nil { t.Fatalf("failed to verify right range's stats after split: %v", err) } }
// insertRow adds to the batch the kv operations necessary to insert a table row // with the given values. func (ri *rowInserter) insertRow(b *client.Batch, values []parser.Datum) error { if len(values) != len(ri.insertCols) { return util.Errorf("got %d values but expected %d", len(values), len(ri.insertCols)) } // Encode the values to the expected column type. This needs to // happen before index encoding because certain datum types (i.e. tuple) // cannot be used as index values. for i, val := range values { // Make sure the value can be written to the column before proceeding. var err error if ri.marshalled[i], err = sqlbase.MarshalColumnValue(ri.insertCols[i], val); err != nil { return err } } primaryIndexKey, secondaryIndexEntries, err := ri.helper.encodeIndexes(ri.insertColIDtoRowIndex, values) if err != nil { return err } // Write the row sentinel. We want to write the sentinel first in case // we are trying to insert a duplicate primary key: if we write the // secondary indexes first, we may get an error that looks like a // uniqueness violation on a non-unique index. ri.key = keys.MakeNonColumnKey(primaryIndexKey) if log.V(2) { log.Infof("CPut %s -> NULL", ri.key) } // Each sentinel value needs a distinct RawBytes field as the computed // checksum includes the key the value is associated with. ri.sentinelValue.SetBytes([]byte{}) b.CPut(&ri.key, &ri.sentinelValue, nil) ri.key = nil for _, secondaryIndexEntry := range secondaryIndexEntries { if log.V(2) { log.Infof("CPut %s -> %v", secondaryIndexEntry.Key, secondaryIndexEntry.Value) } ri.key = secondaryIndexEntry.Key b.CPut(&ri.key, secondaryIndexEntry.Value, nil) } ri.key = nil // Write the row columns. for i, val := range values { col := ri.insertCols[i] if ri.helper.columnInPK(col.ID) { // Skip primary key columns as their values are encoded in the row // sentinel key which is guaranteed to exist for as long as the row // exists. continue } if ri.marshalled[i].RawBytes != nil { // We only output non-NULL values. Non-existent column keys are // considered NULL during scanning and the row sentinel ensures we know // the row exists. ri.key = keys.MakeColumnKey(primaryIndexKey, uint32(col.ID)) if log.V(2) { log.Infof("CPut %s -> %v", ri.key, val) } b.CPut(&ri.key, &ri.marshalled[i], nil) ri.key = nil } } return nil }
// Insert inserts rows into the database. // Privileges: INSERT on table // Notes: postgres requires INSERT. No "on duplicate key update" option. // mysql requires INSERT. Also requires UPDATE on "ON DUPLICATE KEY UPDATE". func (p *planner) Insert(n *parser.Insert, autoCommit bool) (planNode, *roachpb.Error) { // TODO(marcb): We can't use the cached descriptor here because a recent // update of the schema (e.g. the addition of an index) might not be // reflected in the cached version (yet). Perhaps schema modification // routines such as CREATE INDEX should not return until the schema change // has been pushed everywhere. tableDesc, pErr := p.getTableLease(n.Table) if pErr != nil { return nil, pErr } if err := p.checkPrivilege(&tableDesc, privilege.INSERT); err != nil { return nil, roachpb.NewError(err) } var cols []ColumnDescriptor // Determine which columns we're inserting into. if n.DefaultValues() { cols = tableDesc.Columns } else { var err error if cols, err = p.processColumns(&tableDesc, n.Columns); err != nil { return nil, roachpb.NewError(err) } } // Number of columns expecting an input. This doesn't include the // columns receiving a default value. numInputColumns := len(cols) // Construct a map from column ID to the index the value appears at within a // row. colIDtoRowIndex := map[ColumnID]int{} for i, c := range cols { colIDtoRowIndex[c.ID] = i } // Add the column if it has a DEFAULT expression. addIfDefault := func(col ColumnDescriptor) { if col.DefaultExpr != nil { if _, ok := colIDtoRowIndex[col.ID]; !ok { colIDtoRowIndex[col.ID] = len(cols) cols = append(cols, col) } } } // Add any column that has a DEFAULT expression. for _, col := range tableDesc.Columns { addIfDefault(col) } // Also add any column in a mutation that is WRITE_ONLY and has // a DEFAULT expression. for _, m := range tableDesc.Mutations { if m.State != DescriptorMutation_WRITE_ONLY { continue } if col := m.GetColumn(); col != nil { addIfDefault(*col) } } // Verify we have at least the columns that are part of the primary key. primaryKeyCols := map[ColumnID]struct{}{} for i, id := range tableDesc.PrimaryIndex.ColumnIDs { if _, ok := colIDtoRowIndex[id]; !ok { return nil, roachpb.NewUErrorf("missing %q primary key column", tableDesc.PrimaryIndex.ColumnNames[i]) } primaryKeyCols[id] = struct{}{} } // Construct the default expressions. The returned slice will be nil if no // column in the table has a default expression. defaultExprs, err := p.makeDefaultExprs(cols) if err != nil { return nil, roachpb.NewError(err) } // Replace any DEFAULT markers with the corresponding default expressions. insertRows := p.fillDefaults(defaultExprs, cols, n) // Transform the values into a rows object. This expands SELECT statements or // generates rows from the values contained within the query. rows, pErr := p.makePlan(insertRows, false) if pErr != nil { return nil, pErr } if expressions := len(rows.Columns()); expressions > numInputColumns { return nil, roachpb.NewUErrorf("INSERT has more expressions than target columns: %d/%d", expressions, numInputColumns) } primaryIndex := tableDesc.PrimaryIndex primaryIndexKeyPrefix := MakeIndexKeyPrefix(tableDesc.ID, primaryIndex.ID) marshalled := make([]interface{}, len(cols)) b := p.txn.NewBatch() rh, err := makeReturningHelper(p, n.Returning, tableDesc.Name, cols) if err != nil { return nil, roachpb.NewError(err) } for rows.Next() { rowVals := rows.Values() // The values for the row may be shorter than the number of columns being // inserted into. Generate default values for those columns using the // default expressions. for i := len(rowVals); i < len(cols); i++ { if defaultExprs == nil { rowVals = append(rowVals, parser.DNull) continue } d, err := defaultExprs[i].Eval(p.evalCtx) if err != nil { return nil, roachpb.NewError(err) } rowVals = append(rowVals, d) } // Check to see if NULL is being inserted into any non-nullable column. for _, col := range tableDesc.Columns { if !col.Nullable { if i, ok := colIDtoRowIndex[col.ID]; !ok || rowVals[i] == parser.DNull { return nil, roachpb.NewUErrorf("null value in column %q violates not-null constraint", col.Name) } } } // Check that the row value types match the column types. This needs to // happen before index encoding because certain datum types (i.e. tuple) // cannot be used as index values. for i, val := range rowVals { // Make sure the value can be written to the column before proceeding. var mErr error if marshalled[i], mErr = marshalColumnValue(cols[i], val, p.evalCtx.Args); mErr != nil { return nil, roachpb.NewError(mErr) } } if p.prepareOnly { continue } primaryIndexKey, _, eErr := encodeIndexKey( &primaryIndex, colIDtoRowIndex, rowVals, primaryIndexKeyPrefix) if eErr != nil { return nil, roachpb.NewError(eErr) } // Write the secondary indexes. indexes := tableDesc.Indexes // Also include the secondary indexes in mutation state WRITE_ONLY. for _, m := range tableDesc.Mutations { if m.State == DescriptorMutation_WRITE_ONLY { if index := m.GetIndex(); index != nil { indexes = append(indexes, *index) } } } secondaryIndexEntries, eErr := encodeSecondaryIndexes( tableDesc.ID, indexes, colIDtoRowIndex, rowVals) if eErr != nil { return nil, roachpb.NewError(eErr) } for _, secondaryIndexEntry := range secondaryIndexEntries { if log.V(2) { log.Infof("CPut %s -> %v", secondaryIndexEntry.key, secondaryIndexEntry.value) } b.CPut(secondaryIndexEntry.key, secondaryIndexEntry.value, nil) } // Write the row sentinel. sentinelKey := keys.MakeNonColumnKey(primaryIndexKey) if log.V(2) { log.Infof("CPut %s -> NULL", roachpb.Key(sentinelKey)) } // This is subtle: An interface{}(nil) deletes the value, so we pass in // []byte{} as a non-nil value. b.CPut(sentinelKey, []byte{}, nil) // Write the row columns. for i, val := range rowVals { col := cols[i] if _, ok := primaryKeyCols[col.ID]; ok { // Skip primary key columns as their values are encoded in the row // sentinel key which is guaranteed to exist for as long as the row // exists. continue } if marshalled[i] != nil { // We only output non-NULL values. Non-existent column keys are // considered NULL during scanning and the row sentinel ensures we know // the row exists. key := keys.MakeColumnKey(primaryIndexKey, uint32(col.ID)) if log.V(2) { log.Infof("CPut %s -> %v", roachpb.Key(key), val) } b.CPut(key, marshalled[i], nil) } } if err := rh.append(rowVals); err != nil { return nil, roachpb.NewError(err) } } if pErr := rows.PErr(); pErr != nil { return nil, pErr } if p.prepareOnly { // Return the result column types. return rh.getResults(), nil } if isSystemConfigID(tableDesc.GetID()) { // Mark transaction as operating on the system DB. p.txn.SetSystemConfigTrigger() } if autoCommit { // An auto-txn can commit the transaction with the batch. This is an // optimization to avoid an extra round-trip to the transaction // coordinator. pErr = p.txn.CommitInBatch(b) } else { pErr = p.txn.Run(b) } if pErr != nil { return nil, convertBatchError(&tableDesc, *b, pErr) } return rh.getResults(), nil }
// Update updates columns for a selection of rows from a table. // Privileges: UPDATE and SELECT on table. We currently always use a select statement. // Notes: postgres requires UPDATE. Requires SELECT with WHERE clause with table. // mysql requires UPDATE. Also requires SELECT with WHERE clause with table. // TODO(guanqun): need to support CHECK in UPDATE func (p *planner) Update(n *parser.Update, autoCommit bool) (planNode, *roachpb.Error) { tracing.AnnotateTrace() tableDesc, pErr := p.getAliasedTableLease(n.Table) if pErr != nil { return nil, pErr } if err := p.checkPrivilege(tableDesc, privilege.UPDATE); err != nil { return nil, roachpb.NewError(err) } // TODO(dan): Consider caching this on the TableDescriptor. primaryKeyCols := map[ColumnID]struct{}{} for _, id := range tableDesc.PrimaryIndex.ColumnIDs { primaryKeyCols[id] = struct{}{} } exprs := make([]parser.UpdateExpr, len(n.Exprs)) for i, expr := range n.Exprs { exprs[i] = *expr } // Determine which columns we're inserting into. var names parser.QualifiedNames for i, expr := range exprs { newExpr, epErr := p.expandSubqueries(expr.Expr, len(expr.Names)) if epErr != nil { return nil, epErr } exprs[i].Expr = newExpr if expr.Tuple { // TODO(pmattis): The distinction between Tuple and DTuple here is // irritating. We'll see a DTuple if the expression was a subquery that // has been evaluated. We'll see a Tuple in other cases. n := 0 switch t := newExpr.(type) { case *parser.Tuple: n = len(t.Exprs) case parser.DTuple: n = len(t) default: return nil, roachpb.NewErrorf("unsupported tuple assignment: %T", newExpr) } if len(expr.Names) != n { return nil, roachpb.NewUErrorf("number of columns (%d) does not match number of values (%d)", len(expr.Names), n) } } names = append(names, expr.Names...) } cols, err := p.processColumns(tableDesc, names) if err != nil { return nil, roachpb.NewError(err) } // Set of columns being updated var primaryKeyColChange bool colIDSet := map[ColumnID]struct{}{} for _, c := range cols { colIDSet[c.ID] = struct{}{} if _, ok := primaryKeyCols[c.ID]; ok { primaryKeyColChange = true } } defaultExprs, err := makeDefaultExprs(cols, &p.parser, p.evalCtx) if err != nil { return nil, roachpb.NewError(err) } // Generate the list of select targets. We need to select all of the columns // plus we select all of the update expressions in case those expressions // reference columns (e.g. "UPDATE t SET v = v + 1"). Note that we flatten // expressions for tuple assignments just as we flattened the column names // above. So "UPDATE t SET (a, b) = (1, 2)" translates into select targets of // "*, 1, 2", not "*, (1, 2)". // TODO(radu): we only need to select columns necessary to generate primary and // secondary indexes keys, and columns needed by returningHelper. targets := tableDesc.allColumnsSelector() i := 0 // Remember the index where the targets for exprs start. exprTargetIdx := len(targets) for _, expr := range exprs { if expr.Tuple { switch t := expr.Expr.(type) { case *parser.Tuple: for _, e := range t.Exprs { e = fillDefault(e, i, defaultExprs) targets = append(targets, parser.SelectExpr{Expr: e}) i++ } case parser.DTuple: for _, e := range t { targets = append(targets, parser.SelectExpr{Expr: e}) i++ } } } else { e := fillDefault(expr.Expr, i, defaultExprs) targets = append(targets, parser.SelectExpr{Expr: e}) i++ } } tracing.AnnotateTrace() // Query the rows that need updating. rows, pErr := p.SelectClause(&parser.SelectClause{ Exprs: targets, From: []parser.TableExpr{n.Table}, Where: n.Where, }) if pErr != nil { return nil, pErr } rh, err := makeReturningHelper(p, n.Returning, tableDesc.Name, tableDesc.Columns) if err != nil { return nil, roachpb.NewError(err) } // ValArgs have their types populated in the above Select if they are part // of an expression ("SET a = 2 + $1") in the type check step where those // types are inferred. For the simpler case ("SET a = $1"), populate them // using marshalColumnValue. This step also verifies that the expression // types match the column types. if p.evalCtx.PrepareOnly { for i, target := range rows.(*selectNode).render[exprTargetIdx:] { // DefaultVal doesn't implement TypeCheck if _, ok := target.(parser.DefaultVal); ok { continue } d, err := target.TypeCheck(p.evalCtx.Args) if err != nil { return nil, roachpb.NewError(err) } if _, err := marshalColumnValue(cols[i], d, p.evalCtx.Args); err != nil { return nil, roachpb.NewError(err) } } // Return the result column types. return rh.getResults() } // Construct a map from column ID to the index the value appears at within a // row. colIDtoRowIndex := map[ColumnID]int{} for i, col := range tableDesc.Columns { colIDtoRowIndex[col.ID] = i } primaryIndex := tableDesc.PrimaryIndex primaryIndexKeyPrefix := MakeIndexKeyPrefix(tableDesc.ID, primaryIndex.ID) // Secondary indexes needing updating. needsUpdate := func(index IndexDescriptor) bool { // If the primary key changed, we need to update all of them. if primaryKeyColChange { return true } for _, id := range index.ColumnIDs { if _, ok := colIDSet[id]; ok { return true } } return false } indexes := make([]IndexDescriptor, 0, len(tableDesc.Indexes)+len(tableDesc.Mutations)) var deleteOnlyIndex map[int]struct{} for _, index := range tableDesc.Indexes { if needsUpdate(index) { indexes = append(indexes, index) } } for _, m := range tableDesc.Mutations { if index := m.GetIndex(); index != nil { if needsUpdate(*index) { indexes = append(indexes, *index) switch m.State { case DescriptorMutation_DELETE_ONLY: if deleteOnlyIndex == nil { // Allocate at most once. deleteOnlyIndex = make(map[int]struct{}, len(tableDesc.Mutations)) } deleteOnlyIndex[len(indexes)-1] = struct{}{} case DescriptorMutation_WRITE_ONLY: } } } } marshalled := make([]interface{}, len(cols)) b := p.txn.NewBatch() tracing.AnnotateTrace() for rows.Next() { tracing.AnnotateTrace() rowVals := rows.Values() primaryIndexKey, _, err := encodeIndexKey( &primaryIndex, colIDtoRowIndex, rowVals, primaryIndexKeyPrefix) if err != nil { return nil, roachpb.NewError(err) } // Compute the current secondary index key:value pairs for this row. secondaryIndexEntries, err := encodeSecondaryIndexes( tableDesc.ID, indexes, colIDtoRowIndex, rowVals) if err != nil { return nil, roachpb.NewError(err) } // Our updated value expressions occur immediately after the plain // columns in the output. newVals := rowVals[len(tableDesc.Columns):] // Ensure that the values honor the specified column widths. for i := range newVals { if err := checkValueWidth(cols[i], newVals[i]); err != nil { return nil, roachpb.NewError(err) } } // Update the row values. for i, col := range cols { val := newVals[i] if !col.Nullable && val == parser.DNull { return nil, roachpb.NewUErrorf("null value in column %q violates not-null constraint", col.Name) } rowVals[colIDtoRowIndex[col.ID]] = val } // Check that the new value types match the column types. This needs to // happen before index encoding because certain datum types (i.e. tuple) // cannot be used as index values. for i, val := range newVals { var mErr error if marshalled[i], mErr = marshalColumnValue(cols[i], val, p.evalCtx.Args); mErr != nil { return nil, roachpb.NewError(mErr) } } // Compute the new primary index key for this row. newPrimaryIndexKey := primaryIndexKey var rowPrimaryKeyChanged bool if primaryKeyColChange { newPrimaryIndexKey, _, err = encodeIndexKey( &primaryIndex, colIDtoRowIndex, rowVals, primaryIndexKeyPrefix) if err != nil { return nil, roachpb.NewError(err) } // Note that even if primaryIndexColChange is true, it's possible that // primary key fields in this particular row didn't change. rowPrimaryKeyChanged = !bytes.Equal(primaryIndexKey, newPrimaryIndexKey) } // Compute the new secondary index key:value pairs for this row. newSecondaryIndexEntries, eErr := encodeSecondaryIndexes( tableDesc.ID, indexes, colIDtoRowIndex, rowVals) if eErr != nil { return nil, roachpb.NewError(eErr) } if rowPrimaryKeyChanged { // Delete all the data stored under the old primary key. rowStartKey := roachpb.Key(primaryIndexKey) rowEndKey := rowStartKey.PrefixEnd() if log.V(2) { log.Infof("DelRange %s - %s", rowStartKey, rowEndKey) } b.DelRange(rowStartKey, rowEndKey, false) // Delete all the old secondary indexes. for _, secondaryIndexEntry := range secondaryIndexEntries { if log.V(2) { log.Infof("Del %s", secondaryIndexEntry.key) } b.Del(secondaryIndexEntry.key) } // Write the new row sentinel. We want to write the sentinel first in case // we are trying to insert a duplicate primary key: if we write the // secondary indexes first, we may get an error that looks like a // uniqueness violation on a non-unique index. sentinelKey := keys.MakeNonColumnKey(newPrimaryIndexKey) if log.V(2) { log.Infof("CPut %s -> NULL", roachpb.Key(sentinelKey)) } // This is subtle: An interface{}(nil) deletes the value, so we pass in // []byte{} as a non-nil value. b.CPut(sentinelKey, []byte{}, nil) // Write any fields from the old row that were not modified by the UPDATE. for i, col := range tableDesc.Columns { if _, ok := colIDSet[col.ID]; ok { continue } if _, ok := primaryKeyCols[col.ID]; ok { continue } key := keys.MakeColumnKey(newPrimaryIndexKey, uint32(col.ID)) val := rowVals[i] marshalledVal, mErr := marshalColumnValue(col, val, p.evalCtx.Args) if mErr != nil { return nil, roachpb.NewError(mErr) } if log.V(2) { log.Infof("Put %s -> %v", roachpb.Key(key), val) } b.Put(key, marshalledVal) } // At this point, we've deleted the old row and associated index data and // written the sentinel keys and column keys for non-updated columns. Fall // through to below where the index keys and updated column keys will be // written. } // Update secondary indexes. for i, newSecondaryIndexEntry := range newSecondaryIndexEntries { secondaryIndexEntry := secondaryIndexEntries[i] secondaryKeyChanged := !bytes.Equal(newSecondaryIndexEntry.key, secondaryIndexEntry.key) if secondaryKeyChanged { if log.V(2) { log.Infof("Del %s", secondaryIndexEntry.key) } b.Del(secondaryIndexEntry.key) } if rowPrimaryKeyChanged || secondaryKeyChanged { // Do not update Indexes in the DELETE_ONLY state. if _, ok := deleteOnlyIndex[i]; !ok { if log.V(2) { log.Infof("CPut %s -> %v", newSecondaryIndexEntry.key, newSecondaryIndexEntry.value) } b.CPut(newSecondaryIndexEntry.key, newSecondaryIndexEntry.value, nil) } } } // Add the new values. for i, val := range newVals { col := cols[i] if _, ok := primaryKeyCols[col.ID]; ok { // Skip primary key columns as their values are encoded in the row // sentinel key which is guaranteed to exist for as long as the row // exists. continue } key := keys.MakeColumnKey(newPrimaryIndexKey, uint32(col.ID)) if marshalled[i] != nil { // We only output non-NULL values. Non-existent column keys are // considered NULL during scanning and the row sentinel ensures we know // the row exists. if log.V(2) { log.Infof("Put %s -> %v", roachpb.Key(key), val) } b.Put(key, marshalled[i]) } else { // The column might have already existed but is being set to NULL, so // delete it. if log.V(2) { log.Infof("Del %s", key) } b.Del(key) } } // rowVals[:len(tableDesc.Columns)] have been updated with the new values above. if err := rh.append(rowVals[:len(tableDesc.Columns)]); err != nil { return nil, roachpb.NewError(err) } } tracing.AnnotateTrace() if pErr := rows.PErr(); pErr != nil { return nil, pErr } if isSystemConfigID(tableDesc.GetID()) { // Mark transaction as operating on the system DB. p.txn.SetSystemConfigTrigger() } if autoCommit { // An auto-txn can commit the transaction with the batch. This is an // optimization to avoid an extra round-trip to the transaction // coordinator. pErr = p.txn.CommitInBatch(b) } else { pErr = p.txn.Run(b) } if pErr != nil { return nil, convertBatchError(tableDesc, *b, pErr) } tracing.AnnotateTrace() return rh.getResults() }