func createSourceTablet(t *testing.T, ts topo.Server, keyspace, shard string) { vshard, kr, err := topo.ValidateShardName(shard) if err != nil { t.Fatalf("ValidateShardName(%v) failed: %v", shard, err) } ctx := context.Background() tablet := &pb.Tablet{ Alias: &pb.TabletAlias{ Cell: "cell1", Uid: 100, }, Type: pb.TabletType_REPLICA, KeyRange: kr, Keyspace: keyspace, Shard: vshard, PortMap: map[string]int32{ "vt": 80, }, } if err := ts.CreateTablet(ctx, tablet); err != nil { t.Fatalf("CreateTablet failed: %v", err) } if err := topotools.UpdateTabletEndpoints(ctx, ts, tablet); err != nil { t.Fatalf("topotools.UpdateTabletEndpoints failed: %v", err) } }
// updateServingGraph will update the serving graph if we need to. func (agent *ActionAgent) updateServingGraph(tablet *topo.TabletInfo, targetTabletType pbt.TabletType) error { if topo.IsInServingGraph(targetTabletType) { if err := topotools.UpdateTabletEndpoints(agent.batchCtx, agent.TopoServer, tablet.Tablet); err != nil { return fmt.Errorf("UpdateTabletEndpoints failed: %v", err) } } return nil }
func (agent *ActionAgent) verifyServingAddrs(ctx context.Context) error { ti := agent.Tablet() if !ti.IsRunningQueryService() { return nil } // Check to see our address is registered in the right place. return topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, ti.Tablet) }
// createSourceTablet is a helper method to create the source tablet // in the given keyspace/shard. func createSourceTablet(t *testing.T, name string, ts topo.Server, keyspace, shard string) { vshard, kr, err := topo.ValidateShardName(shard) if err != nil { t.Fatalf("ValidateShardName(%v) failed: %v", shard, err) } ctx := context.Background() tablet := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ Cell: "cell1", Uid: 100, }, Type: topodatapb.TabletType_REPLICA, KeyRange: kr, Keyspace: keyspace, Shard: vshard, PortMap: map[string]int32{ "vt": 80, }, } if err := ts.CreateTablet(ctx, tablet); err != nil { t.Fatalf("CreateTablet failed: %v", err) } if err := topotools.UpdateTabletEndpoints(ctx, ts, tablet); err != nil { t.Fatalf("topotools.UpdateTabletEndpoints failed: %v", err) } // register a tablet conn dialer that will return the instance // we want tabletconn.RegisterDialer(name, func(ctx context.Context, endPoint *topodatapb.EndPoint, k, s string, tabletType topodatapb.TabletType, timeout time.Duration) (tabletconn.TabletConn, error) { return &fakeTabletConn{ endPoint: endPoint, keyspace: keyspace, shard: vshard, tabletType: topodatapb.TabletType_REPLICA, }, nil }) flag.Lookup("tablet_protocol").Value.Set(name) }
// InitTablet initializes the tablet record if necessary. func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error { // only enabled if one of init_tablet_type (when healthcheck // is disabled) or init_keyspace (when healthcheck is enabled) // is passed in, then check other parameters if *initTabletType == "" && *initKeyspace == "" { return nil } // figure out our default target type var tabletType topodatapb.TabletType if *initTabletType != "" { if *targetTabletType != "" { log.Fatalf("cannot specify both target_tablet_type and init_tablet_type parameters (as they might conflict)") } // use the type specified on the command line var err error tabletType, err = topoproto.ParseTabletType(*initTabletType) if err != nil { log.Fatalf("Invalid init tablet type %v: %v", *initTabletType, err) } if tabletType == topodatapb.TabletType_MASTER { // We disallow MASTER, so we don't have to change // shard.MasterAlias, and deal with the corner cases. log.Fatalf("init_tablet_type cannot be %v", tabletType) } } else if *targetTabletType != "" { if strings.ToUpper(*targetTabletType) == topodatapb.TabletType_name[int32(topodatapb.TabletType_MASTER)] { log.Fatalf("target_tablet_type cannot be '%v'. Use '%v' instead.", tabletType, topodatapb.TabletType_REPLICA) } // use spare, the healthcheck will turn us into what // we need to be eventually tabletType = topodatapb.TabletType_SPARE } else { log.Fatalf("if init tablet is enabled, one of init_tablet_type or target_tablet_type needs to be specified") } // create a context for this whole operation ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout) defer cancel() // since we're assigned to a shard, make sure it exists, see if // we are its master, and update its cells list if necessary if *initKeyspace == "" || *initShard == "" { log.Fatalf("if init tablet is enabled and the target type is not idle, init_keyspace and init_shard also need to be specified") } shard, _, err := topo.ValidateShardName(*initShard) if err != nil { log.Fatalf("cannot validate shard name: %v", err) } log.Infof("Reading shard record %v/%v", *initKeyspace, shard) // read the shard, create it if necessary si, err := topotools.GetOrCreateShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err) } if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) { // we are the current master for this shard (probably // means the master tablet process was just restarted), // so InitTablet as master. tabletType = topodatapb.TabletType_MASTER } // See if we need to add the tablet's cell to the shard's cell // list. If we do, it has to be under the shard lock. if !si.HasCell(agent.TabletAlias.Cell) { actionNode := actionnode.UpdateShard() lockPath, err := actionNode.LockShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("LockShard(%v/%v) failed: %v", *initKeyspace, shard, err) } // re-read the shard with the lock si, err = agent.TopoServer.GetShard(ctx, *initKeyspace, shard) if err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } // see if we really need to update it now if !si.HasCell(agent.TabletAlias.Cell) { si.Cells = append(si.Cells, agent.TabletAlias.Cell) // write it back if err := agent.TopoServer.UpdateShard(ctx, si); err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } } // and unlock if err := actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, nil); err != nil { return err } } log.Infof("Initializing the tablet for type %v", tabletType) // figure out the hostname hostname := *tabletHostname if hostname != "" { log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname) } else { hostname, err := netutil.FullyQualifiedHostname() if err != nil { return err } log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname) } // create and populate tablet record tablet := &topodatapb.Tablet{ Alias: agent.TabletAlias, Hostname: hostname, PortMap: make(map[string]int32), Keyspace: *initKeyspace, Shard: *initShard, Type: tabletType, DbNameOverride: *initDbNameOverride, Tags: initTags, } if port != 0 { tablet.PortMap["vt"] = port } if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } if err := topo.TabletComplete(tablet); err != nil { return fmt.Errorf("InitTablet TabletComplete failed: %v", err) } // now try to create the record err = agent.TopoServer.CreateTablet(ctx, tablet) switch err { case nil: // it worked, we're good, can update the replication graph if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletReplicationData failed: %v", err) } case topo.ErrNodeExists: // The node already exists, will just try to update // it. So we read it first. oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias) if err != nil { return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } // Sanity check the keyspace and shard if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } // And overwrite the rest *(oldTablet.Tablet) = *tablet if err := agent.TopoServer.UpdateTablet(ctx, oldTablet); err != nil { return fmt.Errorf("UpdateTablet failed: %v", err) } // Note we don't need to UpdateTabletReplicationData // as the tablet already existed with the right data // in the replication graph default: return fmt.Errorf("CreateTablet failed: %v", err) } // and now update the serving graph. Note we do that in any case, // to clean any inaccurate record from any part of the serving graph. if err := topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletEndpoints failed: %v", err) } return nil }
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation // tasks that ensure topology is self-consistent, and then marks the reparent as // finished by updating the global shard record. func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) { var wg sync.WaitGroup var errs concurrency.AllErrorRecorder oldMasterAlias := si.MasterAlias // Update the tablet records and serving graph for the old and new master concurrently. event.DispatchUpdate(ev, "updating old and new master tablet records") log.Infof("finalizeTabletExternallyReparented: updating tablet records") wg.Add(1) go func() { defer wg.Done() // Update our own record to master. updatedTablet, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_MASTER tablet.HealthMap = nil return nil }) if err != nil { errs.RecordError(err) return } // Update the serving graph for the tablet. if updatedTablet != nil { errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, updatedTablet)) } }() if !topoproto.TabletAliasIsZero(oldMasterAlias) { wg.Add(1) go func() { // Forcibly demote the old master in topology, since we can't rely on the // old master to be up to change its own record. oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_SPARE return nil }) if err != nil { errs.RecordError(err) wg.Done() return } // We now know more about the old master, so add it to event data. ev.OldMaster = *oldMasterTablet // Update the serving graph. errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, oldMasterTablet)) wg.Done() // Tell the old master to re-read its tablet record and change its state. // We don't need to wait for it. tmc := tmclient.NewTabletManagerClient() tmc.RefreshState(ctx, topo.NewTabletInfo(oldMasterTablet, -1)) }() } tablet := agent.Tablet() // Wait for the tablet records to be updated. At that point, any rebuild will // see the new master, so we're ready to mark the reparent as done in the // global shard record. wg.Wait() if errs.HasErrors() { return errs.Error() } // Update the master field in the global shard record. We don't use a lock // here anymore. The lock was only to ensure that the global shard record // didn't get modified between the time when we read it and the time when we // write it back. Now we use an update loop pattern to do that instead. event.DispatchUpdate(ev, "updating global shard record") log.Infof("finalizeTabletExternallyReparented: updating global shard record") si, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(shard *topodatapb.Shard) error { shard.MasterAlias = tablet.Alias return nil }) if err != nil { return err } // We already took care of updating the serving graph for the old and new masters. // All that's left now is in case of a cross-cell reparent, we need to update the // master cell setting in the SrvShard records of all cells. if oldMasterAlias == nil || oldMasterAlias.Cell != tablet.Alias.Cell { event.DispatchUpdate(ev, "rebuilding shard serving graph") log.Infof("finalizeTabletExternallyReparented: updating SrvShard in all cells for cross-cell reparent") if err := topotools.UpdateAllSrvShards(ctx, agent.TopoServer, si); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }