// CopyShardReplications will create the ShardReplication objects in // the destination topo func CopyShardReplications(fromTS, toTS topo.Server) { keyspaces, err := fromTS.GetKeyspaces() if err != nil { log.Fatalf("fromTS.GetKeyspaces: %v", err) } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := fromTS.GetShardNames(keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err)) return } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() // read the source shard to get the cells si, err := fromTS.GetShard(keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err)) return } for _, cell := range si.Cells { sri, err := fromTS.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v): %v", cell, keyspace, shard, err)) continue } if err := toTS.UpdateShardReplicationFields(cell, keyspace, shard, func(oldSR *topo.ShardReplication) error { *oldSR = *sri.ShardReplication return nil }); err != nil { rec.RecordError(fmt.Errorf("UpdateShardReplicationFields(%v, %v, %v): %v", cell, keyspace, shard, err)) } } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyShards failed: %v", rec.Error()) } }
// Update shard file with new master, replicas, etc. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function locks individual SvrShard paths, so it doesn't need a lock // on the shard. func RebuildShard(ctx context.Context, log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) (*topo.ShardInfo, error) { log.Infof("RebuildShard %v/%v", keyspace, shard) span := trace.NewSpanFromContext(ctx) span.StartLocal("topotools.RebuildShard") defer span.Finish() ctx = trace.NewContext(ctx, span) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(keyspace, shard) if err != nil { return nil, err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } // start with the master if it's in the current cell tabletsAsMap := make(map[topo.TabletAlias]bool) if shardInfo.MasterAlias.Cell == cell { tabletsAsMap[shardInfo.MasterAlias] = true } wg.Add(1) go func(cell string) { defer wg.Done() // Lock the SrvShard so we don't race with other rebuilds of the same // shard in the same cell (e.g. from our peer tablets). actionNode := actionnode.RebuildSrvShard() lockPath, err := actionNode.LockSrvShard(ctx, ts, cell, keyspace, shard, timeout, interrupted) if err != nil { rec.RecordError(err) return } // read the ShardReplication object to find tablets sri, err := ts.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } // add all relevant tablets to the map for _, rl := range sri.ReplicationLinks { tabletsAsMap[rl.TabletAlias] = true if rl.Parent.Cell == cell { tabletsAsMap[rl.Parent] = true } } // convert the map to a list aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap)) for a := range tabletsAsMap { aliases = append(aliases, a) } // read all the Tablet records tablets, err := topo.GetTabletMap(ctx, ts, aliases) switch err { case nil: // keep going, we're good case topo.ErrPartialResult: log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell) default: rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err)) return } // write the data we need to rebuildErr := rebuildCellSrvShard(ctx, log, ts, shardInfo, cell, tablets) // and unlock if err := actionNode.UnlockSrvShard(ctx, ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil { rec.RecordError(err) } }(cell) } wg.Wait() return shardInfo, rec.Error() }
// ChangeType changes the type of the tablet and possibly also updates // the health informaton for it. Make this external, since these // transitions need to be forced from time to time. // // - if health is nil, we don't touch the Tablet's Health record. // - if health is an empty map, we clear the Tablet's Health record. // - if health has values, we overwrite the Tablet's Health record. func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, health map[string]string, runHooks bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) { return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias) } if runHooks { // Only run the preflight_serving_type hook when // transitioning from non-serving to serving. if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) { if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil { return err } } } tablet.Type = newType if newType == topo.TYPE_IDLE { if tablet.Parent.IsZero() { si, err := ts.GetShard(tablet.Keyspace, tablet.Shard) if err != nil { return err } rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, cell := range si.Cells { wg.Add(1) go func(cell string) { defer wg.Done() sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell) return } for _, rl := range sri.ReplicationLinks { if rl.Parent == tabletAlias { rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell)) } } }(cell) } wg.Wait() if rec.HasErrors() { return rec.Error() } } tablet.Parent = topo.TabletAlias{} tablet.Keyspace = "" tablet.Shard = "" tablet.KeyRange = key.KeyRange{} tablet.Health = health } if health != nil { if len(health) == 0 { tablet.Health = nil } else { tablet.Health = health } } return topo.UpdateTablet(context.TODO(), ts, tablet) }
func CheckShardReplication(t *testing.T, ts topo.Server) { cell := getLocalCell(t, ts) if _, err := ts.GetShardReplication(cell, "test_keyspace", "-10"); err != topo.ErrNoNode { t.Errorf("GetShardReplication(not there): %v", err) } sr := &topo.ShardReplication{ ReplicationLinks: []topo.ReplicationLink{ topo.ReplicationLink{ TabletAlias: topo.TabletAlias{ Cell: "c1", Uid: 1, }, Parent: topo.TabletAlias{ Cell: "c2", Uid: 2, }, }, }, } if err := ts.UpdateShardReplicationFields(cell, "test_keyspace", "-10", func(oldSr *topo.ShardReplication) error { *oldSr = *sr return nil }); err != nil { t.Fatalf("UpdateShardReplicationFields() failed: %v", err) } if sri, err := ts.GetShardReplication(cell, "test_keyspace", "-10"); err != nil { t.Errorf("GetShardReplication(new guy) failed: %v", err) } else { if len(sri.ReplicationLinks) != 1 || sri.ReplicationLinks[0].TabletAlias.Cell != "c1" || sri.ReplicationLinks[0].TabletAlias.Uid != 1 || sri.ReplicationLinks[0].Parent.Cell != "c2" || sri.ReplicationLinks[0].Parent.Uid != 2 { t.Errorf("GetShardReplication(new guy) returned wrong value: %v", *sri) } } if err := ts.UpdateShardReplicationFields(cell, "test_keyspace", "-10", func(sr *topo.ShardReplication) error { sr.ReplicationLinks = append(sr.ReplicationLinks, topo.ReplicationLink{ TabletAlias: topo.TabletAlias{ Cell: "c3", Uid: 3, }, Parent: topo.TabletAlias{ Cell: "c4", Uid: 4, }, }) return nil }); err != nil { t.Errorf("UpdateShardReplicationFields() failed: %v", err) } if sri, err := ts.GetShardReplication(cell, "test_keyspace", "-10"); err != nil { t.Errorf("GetShardReplication(after append) failed: %v", err) } else { if len(sri.ReplicationLinks) != 2 || sri.ReplicationLinks[0].TabletAlias.Cell != "c1" || sri.ReplicationLinks[0].TabletAlias.Uid != 1 || sri.ReplicationLinks[0].Parent.Cell != "c2" || sri.ReplicationLinks[0].Parent.Uid != 2 || sri.ReplicationLinks[1].TabletAlias.Cell != "c3" || sri.ReplicationLinks[1].TabletAlias.Uid != 3 || sri.ReplicationLinks[1].Parent.Cell != "c4" || sri.ReplicationLinks[1].Parent.Uid != 4 { t.Errorf("GetShardReplication(new guy) returned wrong value: %v", *sri) } } if err := ts.DeleteShardReplication(cell, "test_keyspace", "-10"); err != nil { t.Errorf("DeleteShardReplication(existing) failed: %v", err) } if err := ts.DeleteShardReplication(cell, "test_keyspace", "-10"); err != topo.ErrNoNode { t.Errorf("DeleteShardReplication(again) returned: %v", err) } }