// NewQueryResultReaderForTablet creates a new QueryResultReader for // the provided tablet / sql query func NewQueryResultReaderForTablet(ctx context.Context, ts topo.Server, tabletAlias *pb.TabletAlias, sql string) (*QueryResultReader, error) { tablet, err := ts.GetTablet(ctx, tabletAlias) if err != nil { return nil, err } endPoint, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { return nil, err } // use sessionId for now conn, err := tabletconn.GetDialer()(ctx, endPoint, tablet.Keyspace, tablet.Shard, pb.TabletType_UNKNOWN, *remoteActionsTimeout) if err != nil { return nil, err } sr, clientErrFn, err := conn.StreamExecute(ctx, sql, make(map[string]interface{}), 0) if err != nil { return nil, err } // read the columns, or grab the error cols, ok := <-sr if !ok { return nil, fmt.Errorf("Cannot read Fields for query '%v': %v", sql, clientErrFn()) } return &QueryResultReader{ Output: sr, Fields: cols.Fields, conn: conn, clientErrFn: clientErrFn, }, nil }
// loadTablets reads all tablets from topology, converts to endpoints, and updates HealthCheck. func (tw *TopologyWatcher) loadTablets() { var wg sync.WaitGroup newEndPoints := make(map[string]*tabletEndPoint) tabletAlias, err := tw.getTablets(tw) if err != nil { select { case <-tw.ctx.Done(): return default: } log.Errorf("cannot get tablets for cell: %v: %v", tw.cell, err) return } for _, tAlias := range tabletAlias { wg.Add(1) go func(alias *topodatapb.TabletAlias) { defer wg.Done() tw.sem <- 1 // Wait for active queue to drain. tablet, err := tw.topoServer.GetTablet(tw.ctx, alias) <-tw.sem // Done; enable next request to run if err != nil { select { case <-tw.ctx.Done(): return default: } log.Errorf("cannot get tablet for alias %v: %v", alias, err) return } endPoint, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Errorf("cannot get endpoint from tablet %v: %v", tablet, err) return } key := EndPointToMapKey(endPoint) tw.mu.Lock() newEndPoints[key] = &tabletEndPoint{ alias: topoproto.TabletAliasString(alias), endPoint: endPoint, } tw.mu.Unlock() }(tAlias) } wg.Wait() tw.mu.Lock() for key, tep := range newEndPoints { if _, ok := tw.endPoints[key]; !ok { tw.hc.AddEndPoint(tw.cell, tep.alias, tep.endPoint) } } for key, tep := range tw.endPoints { if _, ok := newEndPoints[key]; !ok { tw.hc.RemoveEndPoint(tep.endPoint) } } tw.endPoints = newEndPoints tw.mu.Unlock() }
// loadTablets reads all tablets from topology, converts to endpoints, and updates HealthCheck. func (ctw *CellTabletsWatcher) loadTablets() { var wg sync.WaitGroup newEndPoints := make(map[string]*pbt.EndPoint) tabletAlias, err := ctw.topoServer.GetTabletsByCell(ctw.ctx, ctw.cell) if err != nil { select { case <-ctw.ctx.Done(): return default: } log.Errorf("cannot get tablets for cell: %v: %v", ctw.cell, err) return } for _, tAlias := range tabletAlias { wg.Add(1) go func(alias *pbt.TabletAlias) { defer wg.Done() ctw.sem <- 1 // Wait for active queue to drain. tablet, err := ctw.topoServer.GetTablet(ctw.ctx, alias) <-ctw.sem // Done; enable next request to run if err != nil { select { case <-ctw.ctx.Done(): return default: } log.Errorf("cannot get tablet for alias %v: %v", alias, err) return } endPoint, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Errorf("cannot get endpoint from tablet %v: %v", tablet, err) return } key := EndPointToMapKey(endPoint) ctw.mu.Lock() newEndPoints[key] = endPoint ctw.mu.Unlock() }(tAlias) } wg.Wait() ctw.mu.Lock() for key, ep := range newEndPoints { if _, ok := ctw.endPoints[key]; !ok { ctw.hc.AddEndPoint(ctw.cell, ep) } } for key, ep := range ctw.endPoints { if _, ok := newEndPoints[key]; !ok { ctw.hc.RemoveEndPoint(ep) } } ctw.endPoints = newEndPoints ctw.mu.Unlock() }
func (th *tabletHealth) stream(ctx context.Context, ts topo.Server, tabletAlias *topodatapb.TabletAlias) (err error) { defer func() { th.mu.Lock() th.err = err th.mu.Unlock() close(th.done) }() ti, err := ts.GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(ti.Tablet) if err != nil { return err } // Pass in a tablet type that is not UNKNOWN, so we don't ask // for sessionId. conn, err := tabletconn.GetDialer()(ctx, ep, "", "", topodatapb.TabletType_MASTER, 30*time.Second) if err != nil { return err } defer conn.Close() stream, err := conn.StreamHealth(ctx) if err != nil { return err } first := true for time.Since(th.lastAccessed()) < *tabletHealthKeepAlive { select { case <-ctx.Done(): return ctx.Err() default: } result, err := stream.Recv() if err != nil { return err } th.mu.Lock() th.result = result th.mu.Unlock() if first { // We got the first result, so we're ready to be accessed. close(th.ready) first = false } } return nil }
func commandVtTabletExecute(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { transactionID := subFlags.Int("transaction_id", 0, "transaction id to use, if inside a transaction.") bindVariables := newBindvars(subFlags) keyspace := subFlags.String("keyspace", "", "keyspace the tablet belongs to") shard := subFlags.String("shard", "", "shard the tablet belongs to") tabletType := subFlags.String("tablet_type", "unknown", "tablet type we expect from the tablet (use unknown to use sessionId)") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") json := subFlags.Bool("json", false, "Output JSON instead of human-readable table") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("the <tablet_alias> and <sql> arguments are required for the VtTabletExecute command") } tt, err := topoproto.ParseTabletType(*tabletType) if err != nil { return err } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } conn, err := tabletconn.GetDialer()(ctx, ep, *keyspace, *shard, tt, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() qr, err := conn.Execute(ctx, subFlags.Arg(1), *bindVariables, int64(*transactionID)) if err != nil { return fmt.Errorf("Execute failed: %v", err) } if *json { return printJSON(wr.Logger(), qr) } printQueryResult(loggerWriter{wr.Logger()}, qr) return nil }
func commandVtTabletStreamHealth(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { count := subFlags.Int("count", 1, "number of responses to wait for") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("The <tablet alias> argument is required for the VtTabletStreamHealth command.") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } // pass in a non-UNKNOWN tablet type to not use sessionId conn, err := tabletconn.GetDialer()(ctx, ep, "", "", pb.TabletType_MASTER, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } stream, errFunc, err := conn.StreamHealth(ctx) if err != nil { return err } for i := 0; i < *count; i++ { shr, ok := <-stream if !ok { return fmt.Errorf("stream ended early: %v", errFunc()) } data, err := json.Marshal(shr) if err != nil { wr.Logger().Errorf("cannot json-marshal structure: %v", err) } else { wr.Logger().Printf("%v\n", string(data)) } } return nil }
// UpdateTabletEndpoints fixes up any entries in the serving graph that relate // to a given tablet. func UpdateTabletEndpoints(ctx context.Context, ts topo.Server, tablet *pb.Tablet) (err error) { srvTypes, err := ts.GetSrvTabletTypesPerShard(ctx, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard) if err != nil { if err != topo.ErrNoNode { return err } // It's fine if there are no existing types. srvTypes = nil } wg := sync.WaitGroup{} errs := concurrency.AllErrorRecorder{} // Update the list that the tablet is supposed to be in (if any). if topo.IsInServingGraph(tablet.Type) { endpoint, err := topo.TabletEndPoint(tablet) if err != nil { return err } wg.Add(1) go func() { defer wg.Done() errs.RecordError( updateEndpoint(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard, tablet.Type, endpoint)) }() } // Remove it from any other lists it isn't supposed to be in. for _, srvType := range srvTypes { if srvType != tablet.Type { wg.Add(1) go func(tabletType pb.TabletType) { defer wg.Done() errs.RecordError( removeEndpoint(ctx, ts, tablet.Alias.Cell, tablet.Keyspace, tablet.Shard, tabletType, tablet.Alias.Uid)) }(srvType) } } wg.Wait() return errs.Error() }
func commandVtTabletBegin(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { keyspace := subFlags.String("keyspace", "", "keyspace the tablet belongs to") shard := subFlags.String("shard", "", "shard the tablet belongs to") tabletType := subFlags.String("tablet_type", "unknown", "tablet type we expect from the tablet (use unknown to use sessionId)") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 1 { return fmt.Errorf("the <tablet_alias> argument is required for the VtTabletBegin command") } tt, err := topoproto.ParseTabletType(*tabletType) if err != nil { return err } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } conn, err := tabletconn.GetDialer()(ctx, ep, *keyspace, *shard, tt, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() transactionID, err := conn.Begin(ctx) if err != nil { return fmt.Errorf("Begin failed: %v", err) } result := map[string]int64{ "transaction_id": transactionID, } return printJSON(wr, result) }
func commandVtTabletRollback(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { keyspace := subFlags.String("keyspace", "", "keyspace the tablet belongs to") shard := subFlags.String("shard", "", "shard the tablet belongs to") tabletType := subFlags.String("tablet_type", "unknown", "tablet type we expect from the tablet (use unknown to use sessionId)") connectTimeout := subFlags.Duration("connect_timeout", 30*time.Second, "Connection timeout for vttablet client") if err := subFlags.Parse(args); err != nil { return err } if subFlags.NArg() != 2 { return fmt.Errorf("the <tablet_alias> and <transaction_id> arguments are required for the VtTabletRollback command") } transactionID, err := strconv.ParseInt(subFlags.Arg(1), 10, 64) if err != nil { return err } tt, err := topo.ParseTabletType(*tabletType) if err != nil { return err } tabletAlias, err := topo.ParseTabletAliasString(subFlags.Arg(0)) if err != nil { return err } tabletInfo, err := wr.TopoServer().GetTablet(ctx, tabletAlias) if err != nil { return err } ep, err := topo.TabletEndPoint(tabletInfo.Tablet) if err != nil { return fmt.Errorf("cannot get EndPoint from tablet record: %v", err) } conn, err := tabletconn.GetDialer()(ctx, ep, *keyspace, *shard, tt, *connectTimeout) if err != nil { return fmt.Errorf("cannot connect to tablet %v: %v", tabletAlias, err) } defer conn.Close() return conn.Rollback(ctx, transactionID) }
// NewQueryResultReaderForTablet creates a new QueryResultReader for // the provided tablet / sql query func NewQueryResultReaderForTablet(ctx context.Context, ts topo.Server, tabletAlias *topodatapb.TabletAlias, sql string) (*QueryResultReader, error) { shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) tablet, err := ts.GetTablet(shortCtx, tabletAlias) cancel() if err != nil { return nil, err } endPoint, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { return nil, err } // use sessionId for now conn, err := tabletconn.GetDialer()(ctx, endPoint, tablet.Keyspace, tablet.Shard, topodatapb.TabletType_UNKNOWN, *remoteActionsTimeout) if err != nil { return nil, err } stream, err := conn.StreamExecute(ctx, sql, make(map[string]interface{}), 0) if err != nil { return nil, err } // read the columns, or grab the error cols, err := stream.Recv() if err != nil { return nil, fmt.Errorf("Cannot read Fields for query '%v': %v", sql, err) } return &QueryResultReader{ Output: stream, Fields: cols.Fields, conn: conn, }, nil }
// rebuildCellSrvShard computes and writes the serving graph data to a // single cell func rebuildCellSrvShard(ctx context.Context, log logutil.Logger, ts topo.Server, si *topo.ShardInfo, cell string) (err error) { log.Infof("rebuildCellSrvShard %v/%v in cell %v", si.Keyspace(), si.ShardName(), cell) for { select { case <-ctx.Done(): return ctx.Err() default: } // Read existing EndPoints node versions, so we know if any // changes sneak in after we read the tablets. versions, err := getEndPointsVersions(ctx, ts, cell, si.Keyspace(), si.ShardName()) // Get all tablets in this cell/shard. tablets, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), []string{cell}) if err != nil { if err != topo.ErrPartialResult { return err } log.Warningf("Got ErrPartialResult from topo.GetTabletMapForShardByCell(%v), some tablets may not be added properly to serving graph", cell) } // Build up the serving graph from scratch. serving := make(map[pb.TabletType]*pb.EndPoints) for _, tablet := range tablets { // Only add serving types. if !tablet.IsInServingGraph() { continue } // Check the Keyspace and Shard for the tablet are right. if tablet.Keyspace != si.Keyspace() || tablet.Shard != si.ShardName() { return fmt.Errorf("CRITICAL: tablet %v is in replication graph for shard %v/%v but belongs to shard %v:%v", tablet.Alias, si.Keyspace(), si.ShardName(), tablet.Keyspace, tablet.Shard) } // Add the tablet to the list. endpoints, ok := serving[tablet.Type] if !ok { endpoints = topo.NewEndPoints() serving[tablet.Type] = endpoints } entry, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { log.Warningf("EndPointForTablet failed for tablet %v: %v", tablet.Alias, err) continue } endpoints.Entries = append(endpoints.Entries, entry) } wg := sync.WaitGroup{} fatalErrs := concurrency.AllErrorRecorder{} retryErrs := concurrency.AllErrorRecorder{} // Write nodes that should exist. for tabletType, endpoints := range serving { wg.Add(1) go func(tabletType pb.TabletType, endpoints *pb.EndPoints) { defer wg.Done() log.Infof("saving serving graph for cell %v shard %v/%v tabletType %v", cell, si.Keyspace(), si.ShardName(), tabletType) version, ok := versions[tabletType] if !ok { // This type didn't exist when we first checked. // Try to create, but only if it still doesn't exist. if err := ts.CreateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints); err != nil { log.Warningf("CreateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNodeExists: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } return } // Update only if the version matches. if err := ts.UpdateEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, endpoints, version); err != nil { log.Warningf("UpdateEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrBadVersion, topo.ErrNoNode: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, endpoints) } // Delete nodes that shouldn't exist. for tabletType, version := range versions { if _, ok := serving[tabletType]; !ok { wg.Add(1) go func(tabletType pb.TabletType, version int64) { defer wg.Done() log.Infof("removing stale db type from serving graph: %v", tabletType) if err := ts.DeleteEndPoints(ctx, cell, si.Keyspace(), si.ShardName(), tabletType, version); err != nil && err != topo.ErrNoNode { log.Warningf("DeleteEndPoints(%v, %v, %v) failed during rebuild: %v", cell, si, tabletType, err) switch err { case topo.ErrNoNode: // Someone else deleted it, which is fine. case topo.ErrBadVersion: retryErrs.RecordError(err) default: fatalErrs.RecordError(err) } } }(tabletType, version) } } // Update srvShard object wg.Add(1) go func() { defer wg.Done() log.Infof("updating shard serving graph in cell %v for %v/%v", cell, si.Keyspace(), si.ShardName()) if err := UpdateSrvShard(ctx, ts, cell, si); err != nil { fatalErrs.RecordError(err) log.Warningf("writing serving data in cell %v for %v/%v failed: %v", cell, si.Keyspace(), si.ShardName(), err) } }() wg.Wait() // If there are any fatal errors, give up. if fatalErrs.HasErrors() { return fatalErrs.Error() } // If there are any retry errors, try again. if retryErrs.HasErrors() { continue } // Otherwise, success! return nil } }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := agent.TopoServer.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Remember when we were first told we're the master. // If another tablet claims to be master and offers a more recent time, // that tablet will be trusted over us. agent.mutex.Lock() agent._tabletExternallyReparentedTime = startTime agent._replicationDelay = 0 agent.mutex.Unlock() // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet, OldMaster: topodatapb.Tablet{ Alias: si.MasterAlias, Type: topodatapb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") var wg sync.WaitGroup var errs concurrency.AllErrorRecorder // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := proto.Clone(tablet).(*topodatapb.Tablet) tablet.Type = topodatapb.TabletType_MASTER tablet.HealthMap = nil agent.setTablet(tablet) wg.Add(1) go func() { defer wg.Done() // This is where updateState will block for gracePeriod, while it gives // vtgate a chance to stop sending replica queries. if err := agent.updateState(ctx, oldTablet, "fastTabletExternallyReparented"); err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err)) } }() wg.Add(1) go func() { defer wg.Done() // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := topo.TabletEndPoint(tablet) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err)) return } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), topodatapb.TabletType_MASTER, &topodatapb.EndPoints{Entries: []*topodatapb.EndPoint{ep}}, -1) if err != nil { errs.RecordError(fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err)) return } externalReparentStats.Record("NewMasterVisible", startTime) }() // Wait for serving state grace period and serving graph update. wg.Wait() if errs.HasErrors() { return errs.Error() } // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := topo.GetShard(ctx, agent.TopoServer, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topo.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet.Tablet, OldMaster: pb.Tablet{ Alias: si.MasterAlias, Type: pb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := *tablet.Tablet newTablet := oldTablet newTablet.Type = pb.TabletType_MASTER newTablet.HealthMap = nil agent.setTablet(topo.NewTabletInfo(&newTablet, -1)) if err := agent.updateState(ctx, &oldTablet, "fastTabletExternallyReparented"); err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err) } agent.mutex.Lock() agent._tabletExternallyReparentedTime = time.Now() agent.mutex.Unlock() // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err) } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), pb.TabletType_MASTER, &pb.EndPoints{Entries: []*pb.EndPoint{ep}}, -1) if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err) } externalReparentStats.Record("NewMasterVisible", startTime) // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }