// Make this external, since these transitions need to be forced from time to time. func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, runHooks bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) { return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias) } if runHooks { // Only run the preflight_serving_type hook when // transitioning from non-serving to serving. if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) { if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil { return err } } } tablet.Type = newType if newType == topo.TYPE_IDLE { if tablet.Parent.IsZero() { si, err := ts.GetShard(tablet.Keyspace, tablet.Shard) if err != nil { return err } rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, cell := range si.Cells { wg.Add(1) go func(cell string) { defer wg.Done() sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell) return } for _, rl := range sri.ReplicationLinks { if rl.Parent == tabletAlias { rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell)) } } }(cell) } wg.Wait() if rec.HasErrors() { return rec.Error() } } tablet.Parent = topo.TabletAlias{} tablet.Keyspace = "" tablet.Shard = "" tablet.KeyRange = key.KeyRange{} } return topo.UpdateTablet(ts, tablet) }
// CopyShardReplications will create the ShardReplication objects in // the destination topo func CopyShardReplications(ctx context.Context, fromTS, toTS topo.Server) { keyspaces, err := fromTS.GetKeyspaces(ctx) if err != nil { log.Fatalf("fromTS.GetKeyspaces: %v", err) } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := fromTS.GetShardNames(ctx, keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetShardNames(%v): %v", keyspace, err)) return } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() // read the source shard to get the cells si, err := fromTS.GetShard(ctx, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShard(%v, %v): %v", keyspace, shard, err)) return } for _, cell := range si.Cells { sri, err := fromTS.GetShardReplication(ctx, cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v): %v", cell, keyspace, shard, err)) continue } if err := toTS.UpdateShardReplicationFields(ctx, cell, keyspace, shard, func(oldSR *pb.ShardReplication) error { *oldSR = *sri.ShardReplication return nil }); err != nil { rec.RecordError(fmt.Errorf("UpdateShardReplicationFields(%v, %v, %v): %v", cell, keyspace, shard, err)) } } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyShards failed: %v", rec.Error()) } }
func newCellShardTabletsCache(ts topo.Server) *VersionedObjectCacheMap { return NewVersionedObjectCacheMap(func(key string) *VersionedObjectCache { return NewVersionedObjectCache(func(ctx context.Context) (VersionedObject, error) { parts := strings.Split(key, "/") if len(parts) != 3 { return nil, fmt.Errorf("Invalid shard tablets path: %v", key) } sr, err := ts.GetShardReplication(ctx, parts[0], parts[1], parts[2]) if err != nil { return nil, err } result := &CellShardTablets{ Cell: parts[0], KeyspaceName: parts[1], ShardName: parts[2], TabletAliases: make([]*topodatapb.TabletAlias, len(sr.Nodes)), } for i, node := range sr.Nodes { result.TabletAliases[i] = node.TabletAlias } return result, nil }) }) }
// CopyShardReplications will create the ShardReplication objects in // the destination topo func CopyShardReplications(fromTS, toTS topo.Server) { keyspaces, err := fromTS.GetKeyspaces() if err != nil { log.Fatalf("fromTS.GetKeyspaces failed: %v", err) } wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, keyspace := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() shards, err := fromTS.GetShardNames(keyspace) if err != nil { rec.RecordError(err) return } for _, shard := range shards { wg.Add(1) go func(keyspace, shard string) { defer wg.Done() // read the source shard to get the cells si, err := fromTS.GetShard(keyspace, shard) if err != nil { rec.RecordError(err) return } for _, cell := range si.Cells { sri, err := fromTS.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(err) continue } err = toTS.CreateShardReplication(cell, keyspace, shard, sri.ShardReplication) switch err { case nil: // good case topo.ErrNodeExists: if err := toTS.UpdateShardReplicationFields(cell, keyspace, shard, func(oldSR *topo.ShardReplication) error { *oldSR = *sri.ShardReplication return nil }); err != nil { rec.RecordError(err) } default: rec.RecordError(err) } } }(keyspace, shard) } }(keyspace) } wg.Wait() if rec.HasErrors() { log.Fatalf("copyShards failed: %v", rec.Error()) } }
// CheckShardReplication tests ShardReplication objects func CheckShardReplication(ctx context.Context, t *testing.T, ts topo.Server) { cell := getLocalCell(ctx, t, ts) if _, err := ts.GetShardReplication(ctx, cell, "test_keyspace", "-10"); err != topo.ErrNoNode { t.Errorf("GetShardReplication(not there): %v", err) } sr := &topo.ShardReplication{ ReplicationLinks: []topo.ReplicationLink{ topo.ReplicationLink{ TabletAlias: topo.TabletAlias{ Cell: "c1", Uid: 1, }, }, }, } if err := ts.UpdateShardReplicationFields(ctx, cell, "test_keyspace", "-10", func(oldSr *topo.ShardReplication) error { *oldSr = *sr return nil }); err != nil { t.Fatalf("UpdateShardReplicationFields() failed: %v", err) } if sri, err := ts.GetShardReplication(ctx, cell, "test_keyspace", "-10"); err != nil { t.Errorf("GetShardReplication(new guy) failed: %v", err) } else { if len(sri.ReplicationLinks) != 1 || sri.ReplicationLinks[0].TabletAlias.Cell != "c1" || sri.ReplicationLinks[0].TabletAlias.Uid != 1 { t.Errorf("GetShardReplication(new guy) returned wrong value: %v", *sri) } } if err := ts.UpdateShardReplicationFields(ctx, cell, "test_keyspace", "-10", func(sr *topo.ShardReplication) error { sr.ReplicationLinks = append(sr.ReplicationLinks, topo.ReplicationLink{ TabletAlias: topo.TabletAlias{ Cell: "c3", Uid: 3, }, }) return nil }); err != nil { t.Errorf("UpdateShardReplicationFields() failed: %v", err) } if sri, err := ts.GetShardReplication(ctx, cell, "test_keyspace", "-10"); err != nil { t.Errorf("GetShardReplication(after append) failed: %v", err) } else { if len(sri.ReplicationLinks) != 2 || sri.ReplicationLinks[0].TabletAlias.Cell != "c1" || sri.ReplicationLinks[0].TabletAlias.Uid != 1 || sri.ReplicationLinks[1].TabletAlias.Cell != "c3" || sri.ReplicationLinks[1].TabletAlias.Uid != 3 { t.Errorf("GetShardReplication(new guy) returned wrong value: %v", *sri) } } if err := ts.DeleteShardReplication(ctx, cell, "test_keyspace", "-10"); err != nil { t.Errorf("DeleteShardReplication(existing) failed: %v", err) } if err := ts.DeleteShardReplication(ctx, cell, "test_keyspace", "-10"); err != topo.ErrNoNode { t.Errorf("DeleteShardReplication(again) returned: %v", err) } }
// Update shard file with new master, replicas, etc. // // Re-read from TopologyServer to make sure we are using the side // effects of all actions. // // This function locks individual SvrShard paths, so it doesn't need a lock // on the shard. func RebuildShard(log logutil.Logger, ts topo.Server, keyspace, shard string, cells []string, timeout time.Duration, interrupted chan struct{}) error { log.Infof("RebuildShard %v/%v", keyspace, shard) // read the existing shard info. It has to exist. shardInfo, err := ts.GetShard(keyspace, shard) if err != nil { return err } // rebuild all cells in parallel wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, cell := range shardInfo.Cells { // skip this cell if we shouldn't rebuild it if !topo.InCellList(cell, cells) { continue } // start with the master if it's in the current cell tabletsAsMap := make(map[topo.TabletAlias]bool) if shardInfo.MasterAlias.Cell == cell { tabletsAsMap[shardInfo.MasterAlias] = true } wg.Add(1) go func(cell string) { defer wg.Done() // read the ShardReplication object to find tablets sri, err := ts.GetShardReplication(cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } // add all relevant tablets to the map for _, rl := range sri.ReplicationLinks { tabletsAsMap[rl.TabletAlias] = true if rl.Parent.Cell == cell { tabletsAsMap[rl.Parent] = true } } // convert the map to a list aliases := make([]topo.TabletAlias, 0, len(tabletsAsMap)) for a := range tabletsAsMap { aliases = append(aliases, a) } // read all the Tablet records tablets, err := topo.GetTabletMap(ts, aliases) switch err { case nil: // keep going, we're good case topo.ErrPartialResult: log.Warningf("Got ErrPartialResult from topo.GetTabletMap in cell %v, some tablets may not be added properly to serving graph", cell) default: rec.RecordError(fmt.Errorf("GetTabletMap in cell %v failed: %v", cell, err)) return } // Lock the SrvShard so we write a consistent data set. actionNode := actionnode.RebuildSrvShard() lockPath, err := actionNode.LockSrvShard(ts, cell, keyspace, shard, timeout, interrupted) if err != nil { rec.RecordError(err) return } // write the data we need to rebuildErr := rebuildCellSrvShard(log, ts, shardInfo, cell, tablets) // and unlock if err := actionNode.UnlockSrvShard(ts, cell, keyspace, shard, lockPath, rebuildErr); err != nil { rec.RecordError(err) } }(cell) } wg.Wait() return rec.Error() }