Пример #1
0
func createSourceTablet(t *testing.T, ts topo.Server, keyspace, shard string) {
	vshard, kr, err := topo.ValidateShardName(shard)
	if err != nil {
		t.Fatalf("ValidateShardName(%v) failed: %v", shard, err)
	}

	ctx := context.Background()
	tablet := &pb.Tablet{
		Alias: &pb.TabletAlias{
			Cell: "cell1",
			Uid:  100,
		},
		Type:     pb.TabletType_REPLICA,
		KeyRange: kr,
		Keyspace: keyspace,
		Shard:    vshard,
		PortMap: map[string]int32{
			"vt": 80,
		},
	}
	if err := ts.CreateTablet(ctx, tablet); err != nil {
		t.Fatalf("CreateTablet failed: %v", err)
	}
	if err := topotools.UpdateTabletEndpoints(ctx, ts, tablet); err != nil {
		t.Fatalf("topotools.UpdateTabletEndpoints failed: %v", err)
	}
}
Пример #2
0
// updateServingGraph will update the serving graph if we need to.
func (agent *ActionAgent) updateServingGraph(tablet *topo.TabletInfo, targetTabletType pbt.TabletType) error {
	if topo.IsInServingGraph(targetTabletType) {
		if err := topotools.UpdateTabletEndpoints(agent.batchCtx, agent.TopoServer, tablet.Tablet); err != nil {
			return fmt.Errorf("UpdateTabletEndpoints failed: %v", err)
		}
	}
	return nil
}
Пример #3
0
func (agent *ActionAgent) verifyServingAddrs(ctx context.Context) error {
	ti := agent.Tablet()
	if !ti.IsRunningQueryService() {
		return nil
	}

	// Check to see our address is registered in the right place.
	return topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, ti.Tablet)
}
Пример #4
0
// createSourceTablet is a helper method to create the source tablet
// in the given keyspace/shard.
func createSourceTablet(t *testing.T, name string, ts topo.Server, keyspace, shard string) {
	vshard, kr, err := topo.ValidateShardName(shard)
	if err != nil {
		t.Fatalf("ValidateShardName(%v) failed: %v", shard, err)
	}

	ctx := context.Background()
	tablet := &topodatapb.Tablet{
		Alias: &topodatapb.TabletAlias{
			Cell: "cell1",
			Uid:  100,
		},
		Type:     topodatapb.TabletType_REPLICA,
		KeyRange: kr,
		Keyspace: keyspace,
		Shard:    vshard,
		PortMap: map[string]int32{
			"vt": 80,
		},
	}
	if err := ts.CreateTablet(ctx, tablet); err != nil {
		t.Fatalf("CreateTablet failed: %v", err)
	}
	if err := topotools.UpdateTabletEndpoints(ctx, ts, tablet); err != nil {
		t.Fatalf("topotools.UpdateTabletEndpoints failed: %v", err)
	}

	// register a tablet conn dialer that will return the instance
	// we want
	tabletconn.RegisterDialer(name, func(ctx context.Context, endPoint *topodatapb.EndPoint, k, s string, tabletType topodatapb.TabletType, timeout time.Duration) (tabletconn.TabletConn, error) {
		return &fakeTabletConn{
			endPoint:   endPoint,
			keyspace:   keyspace,
			shard:      vshard,
			tabletType: topodatapb.TabletType_REPLICA,
		}, nil
	})
	flag.Lookup("tablet_protocol").Value.Set(name)
}
Пример #5
0
// InitTablet initializes the tablet record if necessary.
func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error {
	// only enabled if one of init_tablet_type (when healthcheck
	// is disabled) or init_keyspace (when healthcheck is enabled)
	// is passed in, then check other parameters
	if *initTabletType == "" && *initKeyspace == "" {
		return nil
	}

	// figure out our default target type
	var tabletType topodatapb.TabletType
	if *initTabletType != "" {
		if *targetTabletType != "" {
			log.Fatalf("cannot specify both target_tablet_type and init_tablet_type parameters (as they might conflict)")
		}

		// use the type specified on the command line
		var err error
		tabletType, err = topoproto.ParseTabletType(*initTabletType)
		if err != nil {
			log.Fatalf("Invalid init tablet type %v: %v", *initTabletType, err)
		}

		if tabletType == topodatapb.TabletType_MASTER {
			// We disallow MASTER, so we don't have to change
			// shard.MasterAlias, and deal with the corner cases.
			log.Fatalf("init_tablet_type cannot be %v", tabletType)
		}

	} else if *targetTabletType != "" {
		if strings.ToUpper(*targetTabletType) == topodatapb.TabletType_name[int32(topodatapb.TabletType_MASTER)] {
			log.Fatalf("target_tablet_type cannot be '%v'. Use '%v' instead.", tabletType, topodatapb.TabletType_REPLICA)
		}

		// use spare, the healthcheck will turn us into what
		// we need to be eventually
		tabletType = topodatapb.TabletType_SPARE

	} else {
		log.Fatalf("if init tablet is enabled, one of init_tablet_type or target_tablet_type needs to be specified")
	}

	// create a context for this whole operation
	ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout)
	defer cancel()

	// since we're assigned to a shard, make sure it exists, see if
	// we are its master, and update its cells list if necessary
	if *initKeyspace == "" || *initShard == "" {
		log.Fatalf("if init tablet is enabled and the target type is not idle, init_keyspace and init_shard also need to be specified")
	}
	shard, _, err := topo.ValidateShardName(*initShard)
	if err != nil {
		log.Fatalf("cannot validate shard name: %v", err)
	}

	log.Infof("Reading shard record %v/%v", *initKeyspace, shard)

	// read the shard, create it if necessary
	si, err := topotools.GetOrCreateShard(ctx, agent.TopoServer, *initKeyspace, shard)
	if err != nil {
		return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err)
	}
	if si.MasterAlias != nil && topoproto.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) {
		// we are the current master for this shard (probably
		// means the master tablet process was just restarted),
		// so InitTablet as master.
		tabletType = topodatapb.TabletType_MASTER
	}

	// See if we need to add the tablet's cell to the shard's cell
	// list.  If we do, it has to be under the shard lock.
	if !si.HasCell(agent.TabletAlias.Cell) {
		actionNode := actionnode.UpdateShard()
		lockPath, err := actionNode.LockShard(ctx, agent.TopoServer, *initKeyspace, shard)
		if err != nil {
			return fmt.Errorf("LockShard(%v/%v) failed: %v", *initKeyspace, shard, err)
		}

		// re-read the shard with the lock
		si, err = agent.TopoServer.GetShard(ctx, *initKeyspace, shard)
		if err != nil {
			return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err)
		}

		// see if we really need to update it now
		if !si.HasCell(agent.TabletAlias.Cell) {
			si.Cells = append(si.Cells, agent.TabletAlias.Cell)

			// write it back
			if err := agent.TopoServer.UpdateShard(ctx, si); err != nil {
				return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err)
			}
		}

		// and unlock
		if err := actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, nil); err != nil {
			return err
		}
	}
	log.Infof("Initializing the tablet for type %v", tabletType)

	// figure out the hostname
	hostname := *tabletHostname
	if hostname != "" {
		log.Infof("Using hostname: %v from -tablet_hostname flag.", hostname)
	} else {
		hostname, err := netutil.FullyQualifiedHostname()
		if err != nil {
			return err
		}
		log.Infof("Using detected machine hostname: %v To change this, fix your machine network configuration or override it with -tablet_hostname.", hostname)
	}

	// create and populate tablet record
	tablet := &topodatapb.Tablet{
		Alias:          agent.TabletAlias,
		Hostname:       hostname,
		PortMap:        make(map[string]int32),
		Keyspace:       *initKeyspace,
		Shard:          *initShard,
		Type:           tabletType,
		DbNameOverride: *initDbNameOverride,
		Tags:           initTags,
	}
	if port != 0 {
		tablet.PortMap["vt"] = port
	}
	if gRPCPort != 0 {
		tablet.PortMap["grpc"] = gRPCPort
	}
	if err := topo.TabletComplete(tablet); err != nil {
		return fmt.Errorf("InitTablet TabletComplete failed: %v", err)
	}

	// now try to create the record
	err = agent.TopoServer.CreateTablet(ctx, tablet)
	switch err {
	case nil:
		// it worked, we're good, can update the replication graph
		if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil {
			return fmt.Errorf("UpdateTabletReplicationData failed: %v", err)
		}

	case topo.ErrNodeExists:
		// The node already exists, will just try to update
		// it. So we read it first.
		oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias)
		if err != nil {
			return fmt.Errorf("InitTablet failed to read existing tablet record: %v", err)
		}

		// Sanity check the keyspace and shard
		if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard {
			return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard)
		}

		// And overwrite the rest
		*(oldTablet.Tablet) = *tablet
		if err := agent.TopoServer.UpdateTablet(ctx, oldTablet); err != nil {
			return fmt.Errorf("UpdateTablet failed: %v", err)
		}

		// Note we don't need to UpdateTabletReplicationData
		// as the tablet already existed with the right data
		// in the replication graph
	default:
		return fmt.Errorf("CreateTablet failed: %v", err)
	}

	// and now update the serving graph. Note we do that in any case,
	// to clean any inaccurate record from any part of the serving graph.
	if err := topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, tablet); err != nil {
		return fmt.Errorf("UpdateTabletEndpoints failed: %v", err)
	}

	return nil
}
Пример #6
0
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation
// tasks that ensure topology is self-consistent, and then marks the reparent as
// finished by updating the global shard record.
func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) {
	var wg sync.WaitGroup
	var errs concurrency.AllErrorRecorder
	oldMasterAlias := si.MasterAlias

	// Update the tablet records and serving graph for the old and new master concurrently.
	event.DispatchUpdate(ev, "updating old and new master tablet records")
	log.Infof("finalizeTabletExternallyReparented: updating tablet records")
	wg.Add(1)
	go func() {
		defer wg.Done()
		// Update our own record to master.
		updatedTablet, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias,
			func(tablet *topodatapb.Tablet) error {
				tablet.Type = topodatapb.TabletType_MASTER
				tablet.HealthMap = nil
				return nil
			})
		if err != nil {
			errs.RecordError(err)
			return
		}

		// Update the serving graph for the tablet.
		if updatedTablet != nil {
			errs.RecordError(
				topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, updatedTablet))
		}
	}()

	if !topoproto.TabletAliasIsZero(oldMasterAlias) {
		wg.Add(1)
		go func() {
			// Forcibly demote the old master in topology, since we can't rely on the
			// old master to be up to change its own record.
			oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias,
				func(tablet *topodatapb.Tablet) error {
					tablet.Type = topodatapb.TabletType_SPARE
					return nil
				})
			if err != nil {
				errs.RecordError(err)
				wg.Done()
				return
			}

			// We now know more about the old master, so add it to event data.
			ev.OldMaster = *oldMasterTablet

			// Update the serving graph.
			errs.RecordError(
				topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, oldMasterTablet))
			wg.Done()

			// Tell the old master to re-read its tablet record and change its state.
			// We don't need to wait for it.
			tmc := tmclient.NewTabletManagerClient()
			tmc.RefreshState(ctx, topo.NewTabletInfo(oldMasterTablet, -1))
		}()
	}

	tablet := agent.Tablet()

	// Wait for the tablet records to be updated. At that point, any rebuild will
	// see the new master, so we're ready to mark the reparent as done in the
	// global shard record.
	wg.Wait()
	if errs.HasErrors() {
		return errs.Error()
	}

	// Update the master field in the global shard record. We don't use a lock
	// here anymore. The lock was only to ensure that the global shard record
	// didn't get modified between the time when we read it and the time when we
	// write it back. Now we use an update loop pattern to do that instead.
	event.DispatchUpdate(ev, "updating global shard record")
	log.Infof("finalizeTabletExternallyReparented: updating global shard record")
	si, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(shard *topodatapb.Shard) error {
		shard.MasterAlias = tablet.Alias
		return nil
	})
	if err != nil {
		return err
	}

	// We already took care of updating the serving graph for the old and new masters.
	// All that's left now is in case of a cross-cell reparent, we need to update the
	// master cell setting in the SrvShard records of all cells.
	if oldMasterAlias == nil || oldMasterAlias.Cell != tablet.Alias.Cell {
		event.DispatchUpdate(ev, "rebuilding shard serving graph")
		log.Infof("finalizeTabletExternallyReparented: updating SrvShard in all cells for cross-cell reparent")
		if err := topotools.UpdateAllSrvShards(ctx, agent.TopoServer, si); err != nil {
			return err
		}
	}

	event.DispatchUpdate(ev, "finished")
	return nil
}