// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Remember when we were first told we're the master. // If another tablet claims to be master and offers a more recent time, // that tablet will be trusted over us. agent.mutex.Lock() agent._tabletExternallyReparentedTime = startTime agent._replicationDelay = 0 agent.mutex.Unlock() // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet, OldMaster: topodatapb.Tablet{ Alias: si.MasterAlias, Type: topodatapb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") var wg sync.WaitGroup var errs concurrency.AllErrorRecorder // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := proto.Clone(tablet).(*topodatapb.Tablet) tablet.Type = topodatapb.TabletType_MASTER tablet.HealthMap = nil agent.setTablet(tablet) wg.Add(1) go func() { defer wg.Done() // This is where updateState will block for gracePeriod, while it gives // vtgate a chance to stop sending replica queries. if err := agent.updateState(ctx, oldTablet, "fastTabletExternallyReparented"); err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err)) } }() wg.Add(1) go func() { defer wg.Done() // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := topo.TabletEndPoint(tablet) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err)) return } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER, &topodatapb.EndPoints{Entries: []*topodatapb.EndPoint{ep}}, -1) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err)) return } externalReparentStats.Record("NewMasterVisible", startTime) }() // Wait for serving state grace period and serving graph update. wg.Wait() if errs.HasErrors() { return errs.Error() } // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }
// CheckWatchEndPoints makes sure WatchEndPoints works as expected func CheckWatchEndPoints(ctx context.Context, t *testing.T, ts topo.Server) { cell := getLocalCell(ctx, t, ts) keyspace := "test_keyspace" shard := "-10" tabletType := topo.TYPE_MASTER // start watching, should get nil first notifications, stopWatching, err := ts.WatchEndPoints(ctx, cell, keyspace, shard, tabletType) if err != nil { t.Fatalf("WatchEndPoints failed: %v", err) } ep, ok := <-notifications if !ok || ep != nil { t.Fatalf("first value is wrong: %v %v", ep, ok) } // update the endpoints, should get a notification endPoints := &pb.EndPoints{ Entries: []*pb.EndPoint{ &pb.EndPoint{ Uid: 1, Host: "host1", PortMap: map[string]int32{ "vt": 1234, "mysql": 1235, "grpc": 1236, }, }, }, } if err := topo.UpdateEndPoints(ctx, ts, cell, keyspace, shard, tabletType, endPoints, -1); err != nil { t.Fatalf("UpdateEndPoints failed: %v", err) } for { ep, ok := <-notifications if !ok { t.Fatalf("watch channel is closed???") } if ep == nil { // duplicate notification of the first value, that's OK continue } // non-empty value, that one should be ours if !reflect.DeepEqual(endPoints, ep) { t.Fatalf("first value is wrong: %v %v", ep, ok) } break } // delete the endpoints, should get a notification if err := ts.DeleteEndPoints(ctx, cell, keyspace, shard, tabletType, -1); err != nil { t.Fatalf("DeleteEndPoints failed: %v", err) } for { ep, ok := <-notifications if !ok { t.Fatalf("watch channel is closed???") } if ep == nil { break } // duplicate notification of the first value, that's OK, // but value better be good. if !reflect.DeepEqual(endPoints, ep) { t.Fatalf("duplicate notification value is bad: %v", ep) } } // re-create the value, a bit different, should get a notification endPoints.Entries[0].Uid = 2 if err := topo.UpdateEndPoints(ctx, ts, cell, keyspace, shard, tabletType, endPoints, -1); err != nil { t.Fatalf("UpdateEndPoints failed: %v", err) } for { ep, ok := <-notifications if !ok { t.Fatalf("watch channel is closed???") } if ep == nil { // duplicate notification of the closed value, that's OK continue } // non-empty value, that one should be ours if !reflect.DeepEqual(endPoints, ep) { t.Fatalf("value after delete / re-create is wrong: %v %v", ep, ok) } break } // close the stopWatching channel, should eventually get a closed // notifications channel too close(stopWatching) for { ep, ok := <-notifications if !ok { break } if !reflect.DeepEqual(endPoints, ep) { t.Fatalf("duplicate notification value is bad: %v", ep) } } }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := topo.GetShard(ctx, agent.TopoServer, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if si.MasterAlias == tablet.Alias { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet.Tablet, OldMaster: topo.Tablet{Alias: si.MasterAlias, Type: topo.TYPE_MASTER}, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := *tablet.Tablet newTablet := oldTablet newTablet.Type = topo.TYPE_MASTER newTablet.Health = nil agent.setTablet(topo.NewTabletInfo(&newTablet, -1)) if err := agent.updateState(ctx, &oldTablet, "fastTabletExternallyReparented"); err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err) } // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := tablet.EndPoint() if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err) } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), topo.TYPE_MASTER, &topo.EndPoints{Entries: []topo.EndPoint{*ep}}, -1) if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err) } externalReparentStats.Record("NewMasterVisible", startTime) // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }
// CheckServingGraph makes sure the serving graph functions work properly. func CheckServingGraph(ctx context.Context, t *testing.T, ts topo.Server) { cell := getLocalCell(ctx, t, ts) // test individual cell/keyspace/shard/type entries if _, err := ts.GetSrvTabletTypesPerShard(ctx, cell, "test_keyspace", "-10"); err != topo.ErrNoNode { t.Errorf("GetSrvTabletTypesPerShard(invalid): %v", err) } if _, _, err := ts.GetEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER); err != topo.ErrNoNode { t.Errorf("GetEndPoints(invalid): %v", err) } endPoints := &pb.EndPoints{ Entries: []*pb.EndPoint{ &pb.EndPoint{ Uid: 1, Host: "host1", PortMap: map[string]int32{ "vt": 1234, "mysql": 1235, "grpc": 1236, }, }, }, } if err := ts.CreateEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints); err != nil { t.Fatalf("CreateEndPoints(master): %v", err) } // Try to create again. if err := ts.CreateEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints); err != topo.ErrNodeExists { t.Fatalf("CreateEndPoints(master): err = %v, want topo.ErrNodeExists", err) } // Get version. _, version, err := ts.GetEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER) if err != nil { t.Fatalf("GetEndPoints(master): %v", err) } // Make a change. tmp := endPoints.Entries[0].Uid endPoints.Entries[0].Uid = tmp + 1 if err := topo.UpdateEndPoints(ctx, ts, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints, -1); err != nil { t.Fatalf("UpdateEndPoints(master): %v", err) } endPoints.Entries[0].Uid = tmp // Try to delete with the wrong version. if err := ts.DeleteEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER, version); err != topo.ErrBadVersion { t.Fatalf("DeleteEndPoints: err = %v, want topo.ErrBadVersion", err) } // Delete with the correct version. _, version, err = ts.GetEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER) if err != nil { t.Fatalf("GetEndPoints(master): %v", err) } if err := ts.DeleteEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER, version); err != nil { t.Fatalf("DeleteEndPoints: %v", err) } // Recreate it with an unconditional update. if err := topo.UpdateEndPoints(ctx, ts, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints, -1); err != nil { t.Fatalf("UpdateEndPoints(master): %v", err) } if types, err := ts.GetSrvTabletTypesPerShard(ctx, cell, "test_keyspace", "-10"); err != nil || len(types) != 1 || types[0] != topo.TYPE_MASTER { t.Errorf("GetSrvTabletTypesPerShard(1): %v %v", err, types) } // Delete it unconditionally. if err := ts.DeleteEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER, -1); err != nil { t.Fatalf("DeleteEndPoints: %v", err) } // Delete the SrvShard. if err := ts.DeleteSrvShard(ctx, cell, "test_keyspace", "-10"); err != nil { t.Fatalf("DeleteSrvShard: %v", err) } if _, err := ts.GetSrvShard(ctx, cell, "test_keyspace", "-10"); err != topo.ErrNoNode { t.Errorf("GetSrvShard(deleted) got %v, want ErrNoNode", err) } // Re-add endpoints. if err := topo.UpdateEndPoints(ctx, ts, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints, -1); err != nil { t.Fatalf("UpdateEndPoints(master): %v", err) } addrs, version, err := ts.GetEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER) if err != nil { t.Errorf("GetEndPoints: %v", err) } if len(addrs.Entries) != 1 || addrs.Entries[0].Uid != 1 { t.Errorf("GetEndPoints(1): %v", addrs) } if pm := addrs.Entries[0].PortMap; pm["vt"] != 1234 || pm["mysql"] != 1235 || pm["grpc"] != 1236 { t.Errorf("GetSrcTabletType(1).PortMap: want %v, got %v", endPoints.Entries[0].PortMap, pm) } // Update with the wrong version. if err := topo.UpdateEndPoints(ctx, ts, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints, version+1); err != topo.ErrBadVersion { t.Fatalf("UpdateEndPoints(master): err = %v, want topo.ErrBadVersion", err) } // Update with the right version. if err := topo.UpdateEndPoints(ctx, ts, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints, version); err != nil { t.Fatalf("UpdateEndPoints(master): %v", err) } // Update existing EndPoints unconditionally. if err := topo.UpdateEndPoints(ctx, ts, cell, "test_keyspace", "-10", topo.TYPE_MASTER, endPoints, -1); err != nil { t.Fatalf("UpdateEndPoints(master): %v", err) } if err := ts.DeleteEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_REPLICA, -1); err != topo.ErrNoNode { t.Errorf("DeleteEndPoints(unknown): %v", err) } if err := ts.DeleteEndPoints(ctx, cell, "test_keyspace", "-10", topo.TYPE_MASTER, -1); err != nil { t.Errorf("DeleteEndPoints(master): %v", err) } // test cell/keyspace/shard entries (SrvShard) srvShard := &pb.SrvShard{ Name: "-10", KeyRange: newKeyRange3("-10"), MasterCell: "test", } if err := ts.UpdateSrvShard(ctx, cell, "test_keyspace", "-10", srvShard); err != nil { t.Fatalf("UpdateSrvShard(1): %v", err) } if _, err := ts.GetSrvShard(ctx, cell, "test_keyspace", "666"); err != topo.ErrNoNode { t.Errorf("GetSrvShard(invalid): %v", err) } if s, err := ts.GetSrvShard(ctx, cell, "test_keyspace", "-10"); err != nil || s.Name != "-10" || !key.KeyRangeEqual(s.KeyRange, newKeyRange3("-10")) || s.MasterCell != "test" { t.Errorf("GetSrvShard(valid): %v", err) } // test cell/keyspace entries (SrvKeyspace) srvKeyspace := topo.SrvKeyspace{ Partitions: map[topo.TabletType]*topo.KeyspacePartition{ topo.TYPE_MASTER: &topo.KeyspacePartition{ ShardReferences: []topo.ShardReference{ topo.ShardReference{ Name: "-80", KeyRange: newKeyRange("-80"), }, }, }, }, ShardingColumnName: "video_id", ShardingColumnType: key.KIT_UINT64, ServedFrom: map[topo.TabletType]string{ topo.TYPE_REPLICA: "other_keyspace", }, } if err := ts.UpdateSrvKeyspace(ctx, cell, "test_keyspace", &srvKeyspace); err != nil { t.Errorf("UpdateSrvKeyspace(1): %v", err) } if _, err := ts.GetSrvKeyspace(ctx, cell, "test_keyspace666"); err != topo.ErrNoNode { t.Errorf("GetSrvKeyspace(invalid): %v", err) } if k, err := ts.GetSrvKeyspace(ctx, cell, "test_keyspace"); err != nil || len(k.Partitions) != 1 || len(k.Partitions[topo.TYPE_MASTER].ShardReferences) != 1 || k.Partitions[topo.TYPE_MASTER].ShardReferences[0].Name != "-80" || k.Partitions[topo.TYPE_MASTER].ShardReferences[0].KeyRange != newKeyRange("-80") || k.ShardingColumnName != "video_id" || k.ShardingColumnType != key.KIT_UINT64 || k.ServedFrom[topo.TYPE_REPLICA] != "other_keyspace" { t.Errorf("GetSrvKeyspace(valid): %v %v", err, k) } if k, err := ts.GetSrvKeyspaceNames(ctx, cell); err != nil || len(k) != 1 || k[0] != "test_keyspace" { t.Errorf("GetSrvKeyspaceNames(): %v", err) } // check that updating a SrvKeyspace out of the blue works if err := ts.UpdateSrvKeyspace(ctx, cell, "unknown_keyspace_so_far", &srvKeyspace); err != nil { t.Fatalf("UpdateSrvKeyspace(2): %v", err) } if k, err := ts.GetSrvKeyspace(ctx, cell, "unknown_keyspace_so_far"); err != nil || len(k.Partitions) != 1 || len(k.Partitions[topo.TYPE_MASTER].ShardReferences) != 1 || k.Partitions[topo.TYPE_MASTER].ShardReferences[0].Name != "-80" || k.Partitions[topo.TYPE_MASTER].ShardReferences[0].KeyRange != newKeyRange("-80") || k.ShardingColumnName != "video_id" || k.ShardingColumnType != key.KIT_UINT64 || k.ServedFrom[topo.TYPE_REPLICA] != "other_keyspace" { t.Errorf("GetSrvKeyspace(out of the blue): %v %v", err, *k) } // Delete the SrvKeyspace. if err := ts.DeleteSrvKeyspace(ctx, cell, "unknown_keyspace_so_far"); err != nil { t.Fatalf("DeleteSrvShard: %v", err) } if _, err := ts.GetSrvKeyspace(ctx, cell, "unknown_keyspace_so_far"); err != topo.ErrNoNode { t.Errorf("GetSrvKeyspace(deleted) got %v, want ErrNoNode", err) } }