Пример #1
0
func tabletInfoFromJSON(data string, version int64) (*topo.TabletInfo, error) {
	tablet, err := tabletFromJSON(data)
	if err != nil {
		return nil, err
	}
	return topo.NewTabletInfo(tablet, version), nil
}
Пример #2
0
// TestGRPCTMServer creates a fake server implementation, a fake client
// implementation, and runs the test suite against the setup.
func TestGRPCTMServer(t *testing.T) {
	// Listen on a random port
	listener, err := net.Listen("tcp", ":0")
	if err != nil {
		t.Fatalf("Cannot listen: %v", err)
	}
	host := listener.Addr().(*net.TCPAddr).IP.String()
	port := int32(listener.Addr().(*net.TCPAddr).Port)

	// Create a gRPC server and listen on the port.
	s := grpc.NewServer()
	fakeAgent := agentrpctest.NewFakeRPCAgent(t)
	tabletmanagerservicepb.RegisterTabletManagerServer(s, &server{agent: fakeAgent})
	go s.Serve(listener)

	// Create a gRPC client to talk to the fake tablet.
	client := &grpctmclient.Client{}
	ti := topo.NewTabletInfo(&topodatapb.Tablet{
		Alias: &topodatapb.TabletAlias{
			Cell: "test",
			Uid:  123,
		},
		Hostname: host,
		PortMap: map[string]int32{
			"grpc": port,
		},
	}, 0)

	// and run the test suite
	agentrpctest.Run(t, client, ti, fakeAgent)
}
Пример #3
0
func addStatusParts(qsc tabletserver.Controller) {
	servenv.AddStatusPart("Tablet", tabletTemplate, func() interface{} {
		return map[string]interface{}{
			"Tablet":               topo.NewTabletInfo(agent.Tablet(), -1),
			"BlacklistedTables":    agent.BlacklistedTables(),
			"DisallowQueryService": agent.DisallowQueryService(),
			"DisableUpdateStream":  !agent.EnableUpdateStream(),
		}
	})
	servenv.AddStatusFuncs(template.FuncMap{
		"github_com_youtube_vitess_health_html_name": healthHTMLName,
	})
	servenv.AddStatusPart("Health", healthTemplate, func() interface{} {
		return &healthStatus{
			Records: agent.History.Records(),
			Config:  tabletmanager.ConfigHTML(),
		}
	})
	qsc.AddStatusPart()
	servenv.AddStatusPart("Binlog Player", binlogTemplate, func() interface{} {
		return agent.BinlogPlayerMap.Status()
	})
	if onStatusRegistered != nil {
		onStatusRegistered()
	}
}
Пример #4
0
// StartActionLoop will start the action loop for a fake tablet,
// using ft.FakeMysqlDaemon as the backing mysqld.
func (ft *FakeTablet) StartActionLoop(t *testing.T, wr *wrangler.Wrangler) {
	if ft.Agent != nil {
		t.Fatalf("Agent for %v is already running", ft.Tablet.Alias)
	}

	// Listen on a random port for gRPC
	var err error
	ft.Listener, err = net.Listen("tcp", ":0")
	if err != nil {
		t.Fatalf("Cannot listen: %v", err)
	}
	gRPCPort := int32(ft.Listener.Addr().(*net.TCPAddr).Port)

	// if needed, listen on a random port for HTTP
	vtPort := ft.Tablet.PortMap["vt"]
	if ft.StartHTTPServer {
		ft.HTTPListener, err = net.Listen("tcp", ":0")
		if err != nil {
			t.Fatalf("Cannot listen on http port: %v", err)
		}
		handler := http.NewServeMux()
		ft.HTTPServer = http.Server{
			Handler: handler,
		}
		go ft.HTTPServer.Serve(ft.HTTPListener)
		vtPort = int32(ft.HTTPListener.Addr().(*net.TCPAddr).Port)
	}

	// create a test agent on that port, and re-read the record
	// (it has new ports and IP)
	ft.Agent = tabletmanager.NewTestActionAgent(context.Background(), wr.TopoServer(), ft.Tablet.Alias, vtPort, gRPCPort, ft.FakeMysqlDaemon)
	ft.Tablet = ft.Agent.Tablet()

	// create the gRPC server
	ft.RPCServer = grpc.NewServer()
	grpctmserver.RegisterForTest(ft.RPCServer, ft.Agent)
	go ft.RPCServer.Serve(ft.Listener)

	// and wait for it to serve, so we don't start using it before it's
	// ready.
	timeout := 5 * time.Second
	step := 10 * time.Millisecond
	c := tmclient.NewTabletManagerClient()
	for timeout >= 0 {
		ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
		err := c.Ping(ctx, topo.NewTabletInfo(ft.Agent.Tablet(), -1))
		cancel()
		if err == nil {
			break
		}
		time.Sleep(step)
		timeout -= step
	}
	if timeout < 0 {
		panic("StartActionLoop failed.")
	}
}
Пример #5
0
// agentRPCTestIsTimeoutErrorDialTimeout verifies that client.IsTimeoutError()
// returns true for RPCs failed due to a connect timeout during .Dial().
func agentRPCTestIsTimeoutErrorDialTimeout(ctx context.Context, t *testing.T, client tmclient.TabletManagerClient, ti *topo.TabletInfo) {
	// Connect to a non-existing tablet.
	// For example, this provokes gRPC to return error grpc.ErrClientConnTimeout.
	invalidTi := topo.NewTabletInfo(ti.Tablet, ti.Version())
	invalidTi.Tablet = proto.Clone(invalidTi.Tablet).(*topodatapb.Tablet)
	invalidTi.Tablet.Hostname = "Non-Existent.Server"

	shortCtx, cancel := context.WithTimeout(ctx, time.Millisecond)
	defer cancel()
	err := client.Ping(shortCtx, invalidTi)
	if err == nil {
		t.Fatal("agentRPCTestIsTimeoutErrorDialTimeout: connect to non-existant tablet did not fail")
	}
	if !client.IsTimeoutError(err) {
		t.Errorf("agentRPCTestIsTimeoutErrorDialTimeout: want: IsTimeoutError() = true. error: %v", err)
	}
}
Пример #6
0
// GetTablet implements topo.Server.
func (s *Server) GetTablet(ctx context.Context, tabletAlias topo.TabletAlias) (*topo.TabletInfo, error) {
	cell, err := s.getCell(tabletAlias.Cell)
	if err != nil {
		return nil, err
	}

	resp, err := cell.Get(tabletFilePath(tabletAlias.String()), false /* sort */, false /* recursive */)
	if err != nil {
		return nil, convertError(err)
	}
	if resp.Node == nil {
		return nil, ErrBadResponse
	}

	value := &topo.Tablet{}
	if err := json.Unmarshal([]byte(resp.Node.Value), value); err != nil {
		return nil, fmt.Errorf("bad tablet data (%v): %q", err, resp.Node.Value)
	}

	return topo.NewTabletInfo(value, int64(resp.Node.ModifiedIndex)), nil
}
Пример #7
0
// the test here creates a fake server implementation, a fake client
// implementation, and runs the test suite against the setup.
func TestGoRPCTMServer(t *testing.T) {
	// Listen on a random port
	listener, err := net.Listen("tcp", ":0")
	if err != nil {
		t.Fatalf("Cannot listen: %v", err)
	}
	defer listener.Close()
	port := int32(listener.Addr().(*net.TCPAddr).Port)

	// Create a Go Rpc server and listen on the port
	server := rpcplus.NewServer()
	fakeAgent := agentrpctest.NewFakeRPCAgent(t)
	server.Register(&TabletManager{fakeAgent})

	// create the HTTP server, serve the server from it
	handler := http.NewServeMux()
	bsonrpc.ServeCustomRPC(handler, server)
	httpServer := http.Server{
		Handler: handler,
	}
	go httpServer.Serve(listener)

	// Create a Go Rpc client to talk to the fake tablet
	client := &gorpctmclient.GoRPCTabletManagerClient{}
	ti := topo.NewTabletInfo(&pb.Tablet{
		Alias: &pb.TabletAlias{
			Cell: "test",
			Uid:  123,
		},
		Hostname: "localhost",
		PortMap: map[string]int32{
			"vt": port,
		},
	}, 0)

	// and run the test suite
	agentrpctest.Run(t, client, ti, fakeAgent)
}
Пример #8
0
// InitTablet initializes the tablet record if necessary.
func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error {
	// only enabled if one of init_tablet_type (when healthcheck
	// is disabled) or init_keyspace (when healthcheck is enabled)
	// is passed in, then check other parameters
	if *initTabletType == "" && *initKeyspace == "" {
		return nil
	}

	// figure out our default target type
	var tabletType topodatapb.TabletType
	if *initTabletType != "" {
		if *targetTabletType != "" {
			log.Fatalf("cannot specify both target_tablet_type and init_tablet_type parameters (as they might conflict)")
		}

		// use the type specified on the command line
		var err error
		tabletType, err = topoproto.ParseTabletType(*initTabletType)
		if err != nil {
			log.Fatalf("Invalid init tablet type %v: %v", *initTabletType, err)
		}

		if tabletType == topodatapb.TabletType_MASTER {
			// We disallow MASTER, so we don't have to change
			// shard.MasterAlias, and deal with the corner cases.
			log.Fatalf("init_tablet_type cannot be %v", tabletType)
		}

	} else if *targetTabletType != "" {
		if strings.ToUpper(*targetTabletType) == topodatapb.TabletType_name[int32(topodatapb.TabletType_MASTER)] {
			log.Fatalf("target_tablet_type cannot be '%v'. Use '%v' instead.", tabletType, topodatapb.TabletType_REPLICA)
		}

		// use spare, the healthcheck will turn us into what
		// we need to be eventually
		tabletType = topodatapb.TabletType_SPARE

	} else {
		log.Fatalf("if init tablet is enabled, one of init_tablet_type or target_tablet_type needs to be specified")
	}

	// create a context for this whole operation
	ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout)
	defer cancel()

	// since we're assigned to a shard, make sure it exists, see if
	// we are its master, and update its cells list if necessary
	if *initKeyspace == "" || *initShard == "" {
		log.Fatalf("if init tablet is enabled and the target type is not idle, init_keyspace and init_shard also need to be specified")
	}
	shard, _, err := topo.ValidateShardName(*initShard)
	if err != nil {
		log.Fatalf("cannot validate shard name: %v", err)
	}

	log.Infof("Reading shard record %v/%v", *initKeyspace, shard)

	// read the shard, create it if necessary
	si, err := topotools.GetOrCreateShard(ctx, agent.TopoServer, *initKeyspace, shard)
	if err != nil {
		return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err)
	}
	if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) {
		// We're marked as master in the shard record, which could mean the master
		// tablet process was just restarted. However, we need to check if a new
		// master is in the process of taking over. In that case, it will let us
		// know by forcibly updating the old master's tablet record.
		oldTablet, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias)
		switch err {
		case topo.ErrNoNode:
			// There's no existing tablet record, so we can assume
			// no one has left us a message to step down.
			tabletType = topodatapb.TabletType_MASTER
		case nil:
			if oldTablet.Type == topodatapb.TabletType_MASTER {
				// We're marked as master in the shard record,
				// and our existing tablet record agrees.
				tabletType = topodatapb.TabletType_MASTER
			}
		default:
			return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err)
		}
	}

	// See if we need to add the tablet's cell to the shard's cell
	// list.  If we do, it has to be under the shard lock.
	if !si.HasCell(agent.TabletAlias.Cell) {
		actionNode := actionnode.UpdateShard()
		lockPath, err := actionNode.LockShard(ctx, agent.TopoServer, *initKeyspace, shard)
		if err != nil {
			return fmt.Errorf("LockShard(%v/%v) failed: %v", *initKeyspace, shard, err)
		}

		// re-read the shard with the lock
		si, err = agent.TopoServer.GetShard(ctx, *initKeyspace, shard)
		if err != nil {
			return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err)
		}

		// see if we really need to update it now
		if !si.HasCell(agent.TabletAlias.Cell) {
			si.Cells = append(si.Cells, agent.TabletAlias.Cell)

			// write it back
			if err := agent.TopoServer.UpdateShard(ctx, si); err != nil {
				return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err)
			}
		}

		// and unlock
		if err := actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, nil); err != nil {
			return err
		}
	}
	log.Infof("Initializing the tablet for type %v", tabletType)

	// figure out the hostname
	hostname := *tabletHostname
	if hostname != "" {
		log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname)
	} else {
		hostname, err := netutil.FullyQualifiedHostname()
		if err != nil {
			return err
		}
		log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname)
	}

	// create and populate tablet record
	tablet := &topodatapb.Tablet{
		Alias:          agent.TabletAlias,
		Hostname:       hostname,
		PortMap:        make(map[string]int32),
		Keyspace:       *initKeyspace,
		Shard:          *initShard,
		Type:           tabletType,
		DbNameOverride: *initDbNameOverride,
		Tags:           initTags,
	}
	if port != 0 {
		tablet.PortMap["vt"] = port
	}
	if gRPCPort != 0 {
		tablet.PortMap["grpc"] = gRPCPort
	}
	if err := topo.TabletComplete(tablet); err != nil {
		return fmt.Errorf("InitTablet TabletComplete failed: %v", err)
	}

	// now try to create the record
	err = agent.TopoServer.CreateTablet(ctx, tablet)
	switch err {
	case nil:
		// it worked, we're good, can update the replication graph
		if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil {
			return fmt.Errorf("UpdateTabletReplicationData failed: %v", err)
		}

	case topo.ErrNodeExists:
		// The node already exists, will just try to update it. So we read it first.
		oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias)
		if err != nil {
			return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err)
		}

		// Sanity check the keyspace and shard
		if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard {
			return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard)
		}

		// Then overwrite everything, ignoring version mismatch.
		if err := agent.TopoServer.UpdateTablet(ctx, topo.NewTabletInfo(tablet, -1)); err != nil {
			return fmt.Errorf("UpdateTablet failed: %v", err)
		}

		// Note we don't need to UpdateTabletReplicationData
		// as the tablet already existed with the right data
		// in the replication graph
	default:
		return fmt.Errorf("CreateTablet failed: %v", err)
	}

	// and now update the serving graph. Note we do that in any case,
	// to clean any inaccurate record from any part of the serving graph.
	if err := topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, tablet); err != nil {
		return fmt.Errorf("UpdateTabletEndpoints failed: %v", err)
	}

	return nil
}
Пример #9
0
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation
// tasks that ensure topology is self-consistent, and then marks the reparent as
// finished by updating the global shard record.
func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) {
	var wg sync.WaitGroup
	var errs concurrency.AllErrorRecorder
	oldMasterAlias := si.MasterAlias

	// Update the tablet records and serving graph for the old and new master concurrently.
	event.DispatchUpdate(ev, "updating old and new master tablet records")
	log.Infof("finalizeTabletExternallyReparented: updating tablet records")
	wg.Add(1)
	go func() {
		defer wg.Done()
		// Update our own record to master.
		updatedTablet, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias,
			func(tablet *topodatapb.Tablet) error {
				tablet.Type = topodatapb.TabletType_MASTER
				tablet.HealthMap = nil
				return nil
			})
		if err != nil {
			errs.RecordError(err)
			return
		}

		// Update the serving graph for the tablet.
		if updatedTablet != nil {
			errs.RecordError(
				topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, updatedTablet))
		}
	}()

	if !topoproto.TabletAliasIsZero(oldMasterAlias) {
		wg.Add(1)
		go func() {
			// Forcibly demote the old master in topology, since we can't rely on the
			// old master to be up to change its own record.
			oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias,
				func(tablet *topodatapb.Tablet) error {
					tablet.Type = topodatapb.TabletType_SPARE
					return nil
				})
			if err != nil {
				errs.RecordError(err)
				wg.Done()
				return
			}

			// We now know more about the old master, so add it to event data.
			ev.OldMaster = *oldMasterTablet

			// Update the serving graph.
			errs.RecordError(
				topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, oldMasterTablet))
			wg.Done()

			// Tell the old master to re-read its tablet record and change its state.
			// We don't need to wait for it.
			tmc := tmclient.NewTabletManagerClient()
			tmc.RefreshState(ctx, topo.NewTabletInfo(oldMasterTablet, -1))
		}()
	}

	tablet := agent.Tablet()

	// Wait for the tablet records to be updated. At that point, any rebuild will
	// see the new master, so we're ready to mark the reparent as done in the
	// global shard record.
	wg.Wait()
	if errs.HasErrors() {
		return errs.Error()
	}

	// Update the master field in the global shard record. We don't use a lock
	// here anymore. The lock was only to ensure that the global shard record
	// didn't get modified between the time when we read it and the time when we
	// write it back. Now we use an update loop pattern to do that instead.
	event.DispatchUpdate(ev, "updating global shard record")
	log.Infof("finalizeTabletExternallyReparented: updating global shard record")
	si, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(shard *topodatapb.Shard) error {
		shard.MasterAlias = tablet.Alias
		return nil
	})
	if err != nil {
		return err
	}

	// We already took care of updating the serving graph for the old and new masters.
	// All that's left now is in case of a cross-cell reparent, we need to update the
	// master cell setting in the SrvShard records of all cells.
	if oldMasterAlias == nil || oldMasterAlias.Cell != tablet.Alias.Cell {
		event.DispatchUpdate(ev, "rebuilding shard serving graph")
		log.Infof("finalizeTabletExternallyReparented: updating SrvShard in all cells for cross-cell reparent")
		if err := topotools.UpdateAllSrvShards(ctx, agent.TopoServer, si); err != nil {
			return err
		}
	}

	event.DispatchUpdate(ev, "finished")
	return nil
}
Пример #10
0
// TabletExternallyReparented updates all topo records so the current
// tablet is the new master for this shard.
// Should be called under RPCWrapLock.
func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error {
	startTime := time.Now()

	// If there is a finalize step running, wait for it to finish or time out
	// before checking the global shard record again.
	if agent.finalizeReparentCtx != nil {
		select {
		case <-agent.finalizeReparentCtx.Done():
			agent.finalizeReparentCtx = nil
		case <-ctx.Done():
			return ctx.Err()
		}
	}

	tablet := agent.Tablet()

	// Check the global shard record.
	si, err := topo.GetShard(ctx, agent.TopoServer, tablet.Keyspace, tablet.Shard)
	if err != nil {
		log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err)
		return err
	}
	if si.MasterAlias == tablet.Alias {
		// We may get called on the current master even when nothing has changed.
		// If the global shard record is already updated, it means we successfully
		// finished a previous reparent to this tablet.
		return nil
	}

	// Create a reusable Reparent event with available info.
	ev := &events.Reparent{
		ShardInfo:  *si,
		NewMaster:  *tablet.Tablet,
		OldMaster:  topo.Tablet{Alias: si.MasterAlias, Type: topo.TYPE_MASTER},
		ExternalID: externalID,
	}
	defer func() {
		if err != nil {
			event.DispatchUpdate(ev, "failed: "+err.Error())
		}
	}()
	event.DispatchUpdate(ev, "starting external from tablet (fast)")

	// Execute state change to master by force-updating only the local copy of the
	// tablet record. The actual record in topo will be updated later.
	log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER")
	oldTablet := *tablet.Tablet
	newTablet := oldTablet
	newTablet.Type = topo.TYPE_MASTER
	newTablet.Health = nil
	agent.setTablet(topo.NewTabletInfo(&newTablet, -1))
	if err := agent.updateState(ctx, &oldTablet, "fastTabletExternallyReparented"); err != nil {
		return fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err)
	}

	// Directly write the new master endpoint in the serving graph.
	// We will do a true rebuild in the background soon, but in the meantime,
	// this will be enough for clients to re-resolve the new master.
	event.DispatchUpdate(ev, "writing new master endpoint")
	log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph")
	ep, err := tablet.EndPoint()
	if err != nil {
		return fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err)
	}
	err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell,
		si.Keyspace(), si.ShardName(), topo.TYPE_MASTER,
		&topo.EndPoints{Entries: []topo.EndPoint{*ep}}, -1)
	if err != nil {
		return fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err)
	}
	externalReparentStats.Record("NewMasterVisible", startTime)

	// Start the finalize stage with a background context, but connect the trace.
	bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout)
	bgCtx = trace.CopySpan(bgCtx, ctx)
	agent.finalizeReparentCtx = bgCtx
	go func() {
		err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev)
		cancel()

		if err != nil {
			log.Warningf("finalizeTabletExternallyReparented error: %v", err)
			event.DispatchUpdate(ev, "failed: "+err.Error())
			return
		}
		externalReparentStats.Record("FullRebuild", startTime)
	}()

	return nil
}
Пример #11
0
// InitTablet initializes the tablet record if necessary.
func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error {
	// it should be either we have all three of init_keyspace,
	// init_shard and init_tablet_type, or none.
	if *initKeyspace == "" && *initShard == "" && *initTabletType == "" {
		// not initializing the record
		return nil
	}
	if *initKeyspace == "" || *initShard == "" || *initTabletType == "" {
		return fmt.Errorf("either need all of init_keyspace, init_shard and init_tablet_type, or none")
	}

	// parse init_tablet_type
	tabletType, err := topoproto.ParseTabletType(*initTabletType)
	if err != nil {
		return fmt.Errorf("invalid init_tablet_type %v: %v", *initTabletType, err)
	}
	if tabletType == topodatapb.TabletType_MASTER {
		// We disallow MASTER, so we don't have to change
		// shard.MasterAlias, and deal with the corner cases.
		return fmt.Errorf("init_tablet_type cannot be master, use replica instead")
	}

	// parse and validate shard name
	shard, _, err := topo.ValidateShardName(*initShard)
	if err != nil {
		return fmt.Errorf("cannot validate shard name %v: %v", *initShard, err)
	}

	// create a context for this whole operation
	ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout)
	defer cancel()

	// read the shard, create it if necessary
	log.Infof("Reading shard record %v/%v", *initKeyspace, shard)
	si, err := agent.TopoServer.GetOrCreateShard(ctx, *initKeyspace, shard)
	if err != nil {
		return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err)
	}
	if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) {
		// We're marked as master in the shard record, which could mean the master
		// tablet process was just restarted. However, we need to check if a new
		// master is in the process of taking over. In that case, it will let us
		// know by forcibly updating the old master's tablet record.
		oldTablet, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias)
		switch err {
		case topo.ErrNoNode:
			// There's no existing tablet record, so we can assume
			// no one has left us a message to step down.
			tabletType = topodatapb.TabletType_MASTER
		case nil:
			if oldTablet.Type == topodatapb.TabletType_MASTER {
				// We're marked as master in the shard record,
				// and our existing tablet record agrees.
				tabletType = topodatapb.TabletType_MASTER
			}
		default:
			return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err)
		}
	}

	// See if we need to add the tablet's cell to the shard's cell list.
	if !si.HasCell(agent.TabletAlias.Cell) {
		si, err = agent.TopoServer.UpdateShardFields(ctx, *initKeyspace, shard, func(si *topo.ShardInfo) error {
			if si.HasCell(agent.TabletAlias.Cell) {
				// Someone else already did it.
				return topo.ErrNoUpdateNeeded
			}
			si.Cells = append(si.Cells, agent.TabletAlias.Cell)
			return nil
		})
		if err != nil {
			return fmt.Errorf("couldn't add tablet's cell to shard record: %v", err)
		}
	}
	log.Infof("Initializing the tablet for type %v", tabletType)

	// figure out the hostname
	hostname := *tabletHostname
	if hostname != "" {
		log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname)
	} else {
		hostname, err := netutil.FullyQualifiedHostname()
		if err != nil {
			return err
		}
		log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname)
	}

	// create and populate tablet record
	tablet := &topodatapb.Tablet{
		Alias:          agent.TabletAlias,
		Hostname:       hostname,
		PortMap:        make(map[string]int32),
		Keyspace:       *initKeyspace,
		Shard:          *initShard,
		Type:           tabletType,
		DbNameOverride: *initDbNameOverride,
		Tags:           initTags,
	}
	if port != 0 {
		tablet.PortMap["vt"] = port
	}
	if gRPCPort != 0 {
		tablet.PortMap["grpc"] = gRPCPort
	}
	if err := topo.TabletComplete(tablet); err != nil {
		return fmt.Errorf("InitTablet TabletComplete failed: %v", err)
	}

	// Now try to create the record (it will also fix up the
	// ShardReplication record if necessary).
	err = agent.TopoServer.CreateTablet(ctx, tablet)
	switch err {
	case nil:
		// It worked, we're good.
	case topo.ErrNodeExists:
		// The node already exists, will just try to update
		// it. So we read it first.
		oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias)
		if err != nil {
			return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err)
		}

		// Sanity check the keyspace and shard
		if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard {
			return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard)
		}

		// Then overwrite everything, ignoring version mismatch.
		if err := agent.TopoServer.UpdateTablet(ctx, topo.NewTabletInfo(tablet, -1)); err != nil {
			return fmt.Errorf("UpdateTablet failed: %v", err)
		}
	default:
		return fmt.Errorf("CreateTablet failed: %v", err)
	}

	return nil
}