// CopyShardReplications will create the ShardReplication objects in // the destination topo func CopyShardReplications(fromTS, toTS topo.Server) { keyspaces, err := fromTS.GetKeyspaces() if err != nil { log.Fatalf("fromTS.GetKeyspaces: %v", err) } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := fromTS.GetShardNames(keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err)) return } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() // read the source shard to get the cells si, err := fromTS.GetShard(keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err)) return } for _, cell := range si.Cells { sri, err := fromTS.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v): %v", cell, keyspace, shard, err)) continue } if err := toTS.UpdateShardReplicationFields(cell, keyspace, shard, func(oldSR *topo.ShardReplication) error { *oldSR = *sri.ShardReplication return nil }); err != nil { rec.RecordError(fmt.Errorf("UpdateShardReplicationFields(%v, %v, %v): %v", cell, keyspace, shard, err)) } } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyShards failed: %v", rec.Error()) } }
func CheckShard(t *testing.T, ts topo.Server) { if err := ts.CreateKeyspace("test_keyspace", &topo.Keyspace{}); err != nil { t.Fatalf("CreateKeyspace: %v", err) } if err := topo.CreateShard(ts, "test_keyspace", "b0-c0"); err != nil { t.Fatalf("CreateShard: %v", err) } if err := topo.CreateShard(ts, "test_keyspace", "b0-c0"); err != topo.ErrNodeExists { t.Errorf("CreateShard called second time, got: %v", err) } if _, err := ts.GetShard("test_keyspace", "666"); err != topo.ErrNoNode { t.Errorf("GetShard(666): %v", err) } shardInfo, err := ts.GetShard("test_keyspace", "b0-c0") if err != nil { t.Errorf("GetShard: %v", err) } if want := newKeyRange("b0-c0"); shardInfo.KeyRange != want { t.Errorf("shardInfo.KeyRange: want %v, got %v", want, shardInfo.KeyRange) } master := topo.TabletAlias{Cell: "ny", Uid: 1} shardInfo.MasterAlias = master shardInfo.KeyRange = newKeyRange("b0-c0") shardInfo.ServedTypesMap = map[topo.TabletType]*topo.ShardServedType{ topo.TYPE_MASTER: &topo.ShardServedType{}, topo.TYPE_REPLICA: &topo.ShardServedType{Cells: []string{"c1"}}, topo.TYPE_RDONLY: &topo.ShardServedType{}, } shardInfo.SourceShards = []topo.SourceShard{ topo.SourceShard{ Uid: 1, Keyspace: "source_ks", Shard: "b8-c0", KeyRange: newKeyRange("b8-c0"), Tables: []string{"table1", "table2"}, }, } shardInfo.TabletControlMap = map[topo.TabletType]*topo.TabletControl{ topo.TYPE_MASTER: &topo.TabletControl{ Cells: []string{"c1", "c2"}, BlacklistedTables: []string{"black1", "black2"}, }, topo.TYPE_REPLICA: &topo.TabletControl{ DisableQueryService: true, }, } if err := topo.UpdateShard(context.TODO(), ts, shardInfo); err != nil { t.Errorf("UpdateShard: %v", err) } updatedShardInfo, err := ts.GetShard("test_keyspace", "b0-c0") if err != nil { t.Fatalf("GetShard: %v", err) } if eq, err := shardEqual(shardInfo.Shard, updatedShardInfo.Shard); err != nil { t.Errorf("cannot compare shards: %v", err) } else if !eq { t.Errorf("put and got shards are not identical:\n%#v\n%#v", shardInfo.Shard, updatedShardInfo.Shard) } // test GetShardNames shards, err := ts.GetShardNames("test_keyspace") if err != nil { t.Errorf("GetShardNames: %v", err) } if len(shards) != 1 || shards[0] != "b0-c0" { t.Errorf(`GetShardNames: want [ "b0-c0" ], got %v`, shards) } if _, err := ts.GetShardNames("test_keyspace666"); err != topo.ErrNoNode { t.Errorf("GetShardNames(666): %v", err) } }
// Update shard file with new master, replicas, etc. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function locks individual SvrShard paths, so it doesn't need a lock // on the shard. func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) (*topo.ShardInfo, error) { log.Infof("RebuildShard %v/%v", keyspace, shard) span := trace.NewSpanFromContext(ctx) span.StartLocal("topotools.RebuildShard") defer span.Finish() ctx = trace.NewContext(ctx, span) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(keyspace, shard) if err != nil { return nil, err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } // start with the master if it's in the current cell tabletsAsMap := make(map[topo.TabletAlias]bool) if shardInfo.MasterAlias.Cell == cell { tabletsAsMap[shardInfo.MasterAlias] = true } wg.Add(1) go func(cell string) { defer wg.Done() // Lock the SrvShard so we don't race with other rebuilds of the same // shard in the same cell (e.g. from our peer tablets). actionNode := actionnode.RebuildSrvShard() lockPath, err := actionNode.LockSrvShard(ctx, ts, cell, keyspace, shard, timeout, interrupted) if err != nil { rec.RecordError(err) return } // read the ShardReplication object to find tablets sri, err := ts.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } // add all relevant tablets to the map for _, rl := range sri.ReplicationLinks { tabletsAsMap[rl.TabletAlias] = true if rl.Parent.Cell == cell { tabletsAsMap[rl.Parent] = true } } // convert the map to a list aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap)) for a := range tabletsAsMap { aliases = append(aliases, a) } // read all the Tablet records tablets, err := topo.GetTabletMap(ctx, ts, aliases) switch err { case nil: // keep going, we're good case topo.ErrPartialResult: log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell) default: rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err)) return } // write the data we need to rebuildErr := rebuildCellSrvShard(ctx, log, ts, shardInfo, cell, tablets) // and unlock if err := actionNode.UnlockSrvShard(ctx, ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil { rec.RecordError(err) } }(cell) } wg.Wait() return shardInfo, rec.Error() }
// CopyShards will create the shards in the destination topo func CopyShards(fromTS, toTS topo.Server, deleteKeyspaceShards bool) { keyspaces, err := fromTS.GetKeyspaces() if err != nil { log.Fatalf("fromTS.GetKeyspaces: %v", err) } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := fromTS.GetShardNames(keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err)) return } if deleteKeyspaceShards { if err := toTS.DeleteKeyspaceShards(keyspace); err != nil { rec.RecordError(fmt.Errorf("DeleteKeyspaceShards(%v): %v", keyspace, err)) return } } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() if err := topo.CreateShard(toTS, keyspace, shard); err != nil { if err == topo.ErrNodeExists { log.Warningf("shard %v/%v already exists", keyspace, shard) } else { rec.RecordError(fmt.Errorf("CreateShard(%v, %v): %v", keyspace, shard, err)) return } } si, err := fromTS.GetShard(keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err)) return } toSi, err := toTS.GetShard(keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("toTS.GetShard(%v, %v): %v", keyspace, shard, err)) return } if _, err := toTS.UpdateShard(si, toSi.Version()); err != nil { rec.RecordError(fmt.Errorf("UpdateShard(%v, %v): %v", keyspace, shard, err)) } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyShards failed: %v", rec.Error()) } }
// ChangeType changes the type of the tablet and possibly also updates // the health informaton for it. Make this external, since these // transitions need to be forced from time to time. // // - if health is nil, we don't touch the Tablet's Health record. // - if health is an empty map, we clear the Tablet's Health record. // - if health has values, we overwrite the Tablet's Health record. func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, health map[string]string, runHooks bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) { return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias) } if runHooks { // Only run the preflight_serving_type hook when // transitioning from non-serving to serving. if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) { if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil { return err } } } tablet.Type = newType if newType == topo.TYPE_IDLE { if tablet.Parent.IsZero() { si, err := ts.GetShard(tablet.Keyspace, tablet.Shard) if err != nil { return err } rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, cell := range si.Cells { wg.Add(1) go func(cell string) { defer wg.Done() sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell) return } for _, rl := range sri.ReplicationLinks { if rl.Parent == tabletAlias { rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell)) } } }(cell) } wg.Wait() if rec.HasErrors() { return rec.Error() } } tablet.Parent = topo.TabletAlias{} tablet.Keyspace = "" tablet.Shard = "" tablet.KeyRange = key.KeyRange{} tablet.Health = health } if health != nil { if len(health) == 0 { tablet.Health = nil } else { tablet.Health = health } } return topo.UpdateTablet(context.TODO(), ts, tablet) }