// UpdateBlpCheckpoint returns a statement to update a value in the // _vt.blp_checkpoint table. func UpdateBlpCheckpoint(uid uint32, pos myproto.ReplicationPosition, timeUpdated int64, txTimestamp int64) string { if txTimestamp != 0 { return fmt.Sprintf( "UPDATE _vt.blp_checkpoint "+ "SET pos='%v', time_updated=%v, transaction_timestamp=%v "+ "WHERE source_shard_uid=%v", myproto.EncodeReplicationPosition(pos), timeUpdated, txTimestamp, uid) } return fmt.Sprintf( "UPDATE _vt.blp_checkpoint "+ "SET pos='%v', time_updated=%v "+ "WHERE source_shard_uid=%v", myproto.EncodeReplicationPosition(pos), timeUpdated, uid) }
func (s *server) PromoteSlave(ctx context.Context, request *pb.PromoteSlaveRequest) (*pb.PromoteSlaveResponse, error) { ctx = callinfo.GRPCCallInfo(ctx) response := &pb.PromoteSlaveResponse{} return response, s.agent.RPCWrapLockAction(ctx, actionnode.TabletActionPromoteSlave, request, response, true, func() error { position, err := s.agent.PromoteSlave(ctx) if err == nil { response.Position = myproto.EncodeReplicationPosition(position) } return err }) }
func (s *server) RunBlpUntil(ctx context.Context, request *pb.RunBlpUntilRequest) (*pb.RunBlpUntilResponse, error) { ctx = callinfo.GRPCCallInfo(ctx) response := &pb.RunBlpUntilResponse{} return response, s.agent.RPCWrapLock(ctx, actionnode.TabletActionRunBLPUntil, request, response, true, func() error { position, err := s.agent.RunBlpUntil(ctx, blproto.ProtoToBlpPositionList(request.BlpPositions), time.Duration(request.WaitTimeout)) if err == nil { response.Position = myproto.EncodeReplicationPosition(*position) } return err }) }
// InitSlave is part of the tmclient.TabletManagerClient interface func (client *Client) InitSlave(ctx context.Context, tablet *topo.TabletInfo, parent *pbt.TabletAlias, replicationPosition myproto.ReplicationPosition, timeCreatedNS int64) error { cc, c, err := client.dial(ctx, tablet) if err != nil { return err } defer cc.Close() _, err = c.InitSlave(ctx, &pb.InitSlaveRequest{ Parent: parent, ReplicationPosition: myproto.EncodeReplicationPosition(replicationPosition), TimeCreatedNs: timeCreatedNS, }) return err }
// PopulateReparentJournal is part of the tmclient.TabletManagerClient interface func (client *Client) PopulateReparentJournal(ctx context.Context, tablet *topo.TabletInfo, timeCreatedNS int64, actionName string, masterAlias *pbt.TabletAlias, pos myproto.ReplicationPosition) error { cc, c, err := client.dial(ctx, tablet) if err != nil { return err } defer cc.Close() _, err = c.PopulateReparentJournal(ctx, &pb.PopulateReparentJournalRequest{ TimeCreatedNs: timeCreatedNS, ActionName: actionName, MasterAlias: masterAlias, ReplicationPosition: myproto.EncodeReplicationPosition(pos), }) return err }
func (s *server) StopSlaveMinimum(ctx context.Context, request *pb.StopSlaveMinimumRequest) (*pb.StopSlaveMinimumResponse, error) { ctx = callinfo.GRPCCallInfo(ctx) response := &pb.StopSlaveMinimumResponse{} return response, s.agent.RPCWrapLock(ctx, actionnode.TabletActionStopSlaveMinimum, request, response, true, func() error { position, err := myproto.DecodeReplicationPosition(request.Position) if err != nil { return err } position, err = s.agent.StopSlaveMinimum(ctx, position, time.Duration(request.WaitTimeout)) if err == nil { response.Position = myproto.EncodeReplicationPosition(position) } return err }) }
// PromoteSlaveWhenCaughtUp is part of the tmclient.TabletManagerClient interface func (client *Client) PromoteSlaveWhenCaughtUp(ctx context.Context, tablet *topo.TabletInfo, pos myproto.ReplicationPosition) (myproto.ReplicationPosition, error) { cc, c, err := client.dial(ctx, tablet) if err != nil { return myproto.ReplicationPosition{}, err } defer cc.Close() response, err := c.PromoteSlaveWhenCaughtUp(ctx, &pb.PromoteSlaveWhenCaughtUpRequest{ Position: myproto.EncodeReplicationPosition(pos), }) if err != nil { return myproto.ReplicationPosition{}, err } position, err := myproto.DecodeReplicationPosition(response.Position) if err != nil { return myproto.ReplicationPosition{}, err } return position, err }
// StopSlaveMinimum is part of the tmclient.TabletManagerClient interface func (client *Client) StopSlaveMinimum(ctx context.Context, tablet *topo.TabletInfo, minPos myproto.ReplicationPosition, waitTime time.Duration) (myproto.ReplicationPosition, error) { cc, c, err := client.dial(ctx, tablet) if err != nil { return myproto.ReplicationPosition{}, err } defer cc.Close() response, err := c.StopSlaveMinimum(ctx, &pb.StopSlaveMinimumRequest{ Position: myproto.EncodeReplicationPosition(minPos), WaitTimeout: int64(waitTime), }) if err != nil { return myproto.ReplicationPosition{}, err } position, err := myproto.DecodeReplicationPosition(response.Position) if err != nil { return myproto.ReplicationPosition{}, err } return position, err }
func TestMigrateServedTypes(t *testing.T) { db := fakesqldb.Register() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) vp := NewVtctlPipe(t, ts) defer vp.Close() // create keyspace if err := ts.CreateKeyspace(context.Background(), "ks", &pb.Keyspace{ ShardingColumnName: "keyspace_id", ShardingColumnType: pb.KeyspaceIdType_UINT64, }); err != nil { t.Fatalf("CreateKeyspace failed: %v", err) } // create the source shard sourceMaster := NewFakeTablet(t, wr, "cell1", 10, pb.TabletType_MASTER, db, TabletKeyspaceShard(t, "ks", "0")) sourceReplica := NewFakeTablet(t, wr, "cell1", 11, pb.TabletType_REPLICA, db, TabletKeyspaceShard(t, "ks", "0")) sourceRdonly := NewFakeTablet(t, wr, "cell1", 12, pb.TabletType_RDONLY, db, TabletKeyspaceShard(t, "ks", "0")) // create the first destination shard dest1Master := NewFakeTablet(t, wr, "cell1", 20, pb.TabletType_MASTER, db, TabletKeyspaceShard(t, "ks", "-80")) dest1Replica := NewFakeTablet(t, wr, "cell1", 21, pb.TabletType_REPLICA, db, TabletKeyspaceShard(t, "ks", "-80")) dest1Rdonly := NewFakeTablet(t, wr, "cell1", 22, pb.TabletType_RDONLY, db, TabletKeyspaceShard(t, "ks", "-80")) // create the second destination shard dest2Master := NewFakeTablet(t, wr, "cell1", 30, pb.TabletType_MASTER, db, TabletKeyspaceShard(t, "ks", "80-")) dest2Replica := NewFakeTablet(t, wr, "cell1", 31, pb.TabletType_REPLICA, db, TabletKeyspaceShard(t, "ks", "80-")) dest2Rdonly := NewFakeTablet(t, wr, "cell1", 32, pb.TabletType_RDONLY, db, TabletKeyspaceShard(t, "ks", "80-")) // double check the shards have the right served types checkShardServedTypes(t, ts, "0", 3) checkShardServedTypes(t, ts, "-80", 0) checkShardServedTypes(t, ts, "80-", 0) // sourceRdonly will see the refresh sourceRdonly.StartActionLoop(t, wr) defer sourceRdonly.StopActionLoop(t) // sourceReplica will see the refresh sourceReplica.StartActionLoop(t, wr) defer sourceReplica.StopActionLoop(t) // sourceMaster will see the refresh, and has to respond to it // also will be asked about its replication position. sourceMaster.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 892, }, } sourceMaster.StartActionLoop(t, wr) defer sourceMaster.StopActionLoop(t) // dest1Rdonly will see the refresh dest1Rdonly.StartActionLoop(t, wr) defer dest1Rdonly.StopActionLoop(t) // dest1Replica will see the refresh dest1Replica.StartActionLoop(t, wr) defer dest1Replica.StopActionLoop(t) // dest1Master will see the refresh, and has to respond to it. // It will also need to respond to WaitBlpPosition, saying it's already caught up. dest1Master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*mproto.QueryResult{ "SELECT pos, flags FROM _vt.blp_checkpoint WHERE source_shard_uid=0": &mproto.QueryResult{ Rows: [][]sqltypes.Value{ []sqltypes.Value{ sqltypes.MakeString([]byte(myproto.EncodeReplicationPosition(sourceMaster.FakeMysqlDaemon.CurrentMasterPosition))), sqltypes.MakeString([]byte("")), }, }, }, } dest1Master.StartActionLoop(t, wr) defer dest1Master.StopActionLoop(t) // dest2Rdonly will see the refresh dest2Rdonly.StartActionLoop(t, wr) defer dest2Rdonly.StopActionLoop(t) // dest2Replica will see the refresh dest2Replica.StartActionLoop(t, wr) defer dest2Replica.StopActionLoop(t) // dest2Master will see the refresh, and has to respond to it. // It will also need to respond to WaitBlpPosition, saying it's already caught up. dest2Master.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*mproto.QueryResult{ "SELECT pos, flags FROM _vt.blp_checkpoint WHERE source_shard_uid=0": &mproto.QueryResult{ Rows: [][]sqltypes.Value{ []sqltypes.Value{ sqltypes.MakeString([]byte(myproto.EncodeReplicationPosition(sourceMaster.FakeMysqlDaemon.CurrentMasterPosition))), sqltypes.MakeString([]byte("")), }, }, }, } dest2Master.StartActionLoop(t, wr) defer dest2Master.StopActionLoop(t) // simulate the clone, by fixing the dest shard record if err := vp.Run([]string{"SourceShardAdd", "--key_range=-", "ks/-80", "0", "ks/0"}); err != nil { t.Fatalf("SourceShardAdd failed: %v", err) } if err := vp.Run([]string{"SourceShardAdd", "--key_range=-", "ks/80-", "0", "ks/0"}); err != nil { t.Fatalf("SourceShardAdd failed: %v", err) } // migrate rdonly over if err := vp.Run([]string{"MigrateServedTypes", "ks/0", "rdonly"}); err != nil { t.Fatalf("MigrateServedType(rdonly) failed: %v", err) } checkShardServedTypes(t, ts, "0", 2) checkShardServedTypes(t, ts, "-80", 1) checkShardServedTypes(t, ts, "80-", 1) // migrate replica over if err := vp.Run([]string{"MigrateServedTypes", "ks/0", "replica"}); err != nil { t.Fatalf("MigrateServedType(replica) failed: %v", err) } checkShardServedTypes(t, ts, "0", 1) checkShardServedTypes(t, ts, "-80", 2) checkShardServedTypes(t, ts, "80-", 2) // migrate master over if err := vp.Run([]string{"MigrateServedTypes", "ks/0", "master"}); err != nil { t.Fatalf("MigrateServedType(master) failed: %v", err) } checkShardServedTypes(t, ts, "0", 0) checkShardServedTypes(t, ts, "-80", 3) checkShardServedTypes(t, ts, "80-", 3) }
// PopulateBlpCheckpoint returns a statement to populate the first value into // the _vt.blp_checkpoint table. func PopulateBlpCheckpoint(index uint32, pos myproto.ReplicationPosition, timeUpdated int64, flags string) string { return fmt.Sprintf("INSERT INTO _vt.blp_checkpoint "+ "(source_shard_uid, pos, time_updated, transaction_timestamp, flags) "+ "VALUES (%v, '%v', %v, 0, '%v')", index, myproto.EncodeReplicationPosition(pos), timeUpdated, flags) }
// ApplyBinlogEvents makes an RPC request to BinlogServer // and processes the events. It will return nil if the provided context // was canceled, or if we reached the stopping point. // It will return io.EOF if the server stops sending us updates. // It may return any other error it encounters. func (blp *BinlogPlayer) ApplyBinlogEvents(ctx context.Context) error { if len(blp.tables) > 0 { log.Infof("BinlogPlayer client %v for tables %v starting @ '%v', server: %v", blp.blpPos.Uid, blp.tables, blp.blpPos.Position, blp.endPoint, ) } else { log.Infof("BinlogPlayer client %v for keyrange '%v-%v' starting @ '%v', server: %v", blp.blpPos.Uid, hex.EncodeToString(blp.keyRange.Start), hex.EncodeToString(blp.keyRange.End), blp.blpPos.Position, blp.endPoint, ) } if !blp.stopPosition.IsZero() { // We need to stop at some point. Sanity check the point. switch { case blp.blpPos.Position.Equal(blp.stopPosition): log.Infof("Not starting BinlogPlayer, we're already at the desired position %v", blp.stopPosition) return nil case blp.blpPos.Position.AtLeast(blp.stopPosition): return fmt.Errorf("starting point %v greater than stopping point %v", blp.blpPos.Position, blp.stopPosition) default: log.Infof("Will stop player when reaching %v", blp.stopPosition) } } clientFactory, ok := clientFactories[*binlogPlayerProtocol] if !ok { return fmt.Errorf("no binlog player client factory named %v", *binlogPlayerProtocol) } blplClient := clientFactory() err := blplClient.Dial(blp.endPoint, *binlogPlayerConnTimeout) if err != nil { log.Errorf("Error dialing binlog server: %v", err) return fmt.Errorf("error dialing binlog server: %v", err) } defer blplClient.Close() // Get the current charset of our connection, so we can ask the stream server // to check that they match. The streamer will also only send per-statement // charset data if that statement's charset is different from what we specify. if dbClient, ok := blp.dbClient.(*DBClient); ok { blp.defaultCharset, err = dbClient.dbConn.GetCharset() if err != nil { return fmt.Errorf("can't get charset to request binlog stream: %v", err) } log.Infof("original charset: %v", blp.defaultCharset) blp.currentCharset = blp.defaultCharset // Restore original charset when we're done. defer func() { log.Infof("restoring original charset %v", blp.defaultCharset) if csErr := dbClient.dbConn.SetCharset(blp.defaultCharset); csErr != nil { log.Errorf("can't restore original charset %v: %v", blp.defaultCharset, csErr) } }() } var responseChan chan *proto.BinlogTransaction var errFunc ErrFunc if len(blp.tables) > 0 { responseChan, errFunc, err = blplClient.StreamTables(ctx, myproto.EncodeReplicationPosition(blp.blpPos.Position), blp.tables, &blp.defaultCharset) } else { responseChan, errFunc, err = blplClient.StreamKeyRange(ctx, myproto.EncodeReplicationPosition(blp.blpPos.Position), key.ProtoToKeyspaceIdType(blp.keyspaceIdType), blp.keyRange, &blp.defaultCharset) } if err != nil { log.Errorf("Error sending streaming query to binlog server: %v", err) return fmt.Errorf("error sending streaming query to binlog server: %v", err) } for response := range responseChan { for { ok, err = blp.processTransaction(response) if err != nil { return fmt.Errorf("Error in processing binlog event %v", err) } if ok { if !blp.stopPosition.IsZero() { if blp.blpPos.Position.AtLeast(blp.stopPosition) { log.Infof("Reached stopping position, done playing logs") return nil } } break } log.Infof("Retrying txn") time.Sleep(1 * time.Second) } } switch err := errFunc(); err { case nil: return io.EOF case context.Canceled: return nil default: // if the context is canceled, we return nil (some RPC // implementations will remap the context error to their own // errors) select { case <-ctx.Done(): if ctx.Err() == context.Canceled { return nil } default: } return fmt.Errorf("Error received from ServeBinlog %v", err) } }
// BlpPositionToProto converts a BlpPosition to a proto3 func BlpPositionToProto(b *BlpPosition) *pbt.BlpPosition { return &pbt.BlpPosition{ Uid: b.Uid, Position: myproto.EncodeReplicationPosition(b.Position), } }
func TestMigrateServedFrom(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) vp := NewVtctlPipe(t, ts) defer vp.Close() // create the source keyspace tablets sourceMaster := NewFakeTablet(t, wr, "cell1", 10, pb.TabletType_MASTER, TabletKeyspaceShard(t, "source", "0")) sourceReplica := NewFakeTablet(t, wr, "cell1", 11, pb.TabletType_REPLICA, TabletKeyspaceShard(t, "source", "0")) sourceRdonly := NewFakeTablet(t, wr, "cell1", 12, pb.TabletType_RDONLY, TabletKeyspaceShard(t, "source", "0")) // create the destination keyspace, served form source // double check it has all entries in map if err := vp.Run([]string{"CreateKeyspace", "-served_from", "master:source,replica:source,rdonly:source", "dest"}); err != nil { t.Fatalf("CreateKeyspace(dest) failed: %v", err) } ki, err := ts.GetKeyspace(ctx, "dest") if err != nil { t.Fatalf("GetKeyspace failed: %v", err) } if len(ki.ServedFroms) != 3 { t.Fatalf("bad initial dest ServedFroms: %+v", ki.ServedFroms) } // create the destination keyspace tablets destMaster := NewFakeTablet(t, wr, "cell1", 20, pb.TabletType_MASTER, TabletKeyspaceShard(t, "dest", "0")) destReplica := NewFakeTablet(t, wr, "cell1", 21, pb.TabletType_REPLICA, TabletKeyspaceShard(t, "dest", "0")) destRdonly := NewFakeTablet(t, wr, "cell1", 22, pb.TabletType_RDONLY, TabletKeyspaceShard(t, "dest", "0")) // sourceRdonly will see the refresh sourceRdonly.StartActionLoop(t, wr) defer sourceRdonly.StopActionLoop(t) // sourceReplica will see the refresh sourceReplica.StartActionLoop(t, wr) defer sourceReplica.StopActionLoop(t) // sourceMaster will see the refresh, and has to respond to it // also will be asked about its replication position. sourceMaster.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 892, }, } sourceMaster.StartActionLoop(t, wr) defer sourceMaster.StopActionLoop(t) // destRdonly will see the refresh destRdonly.StartActionLoop(t, wr) defer destRdonly.StopActionLoop(t) // destReplica will see the refresh destReplica.StartActionLoop(t, wr) defer destReplica.StopActionLoop(t) // destMaster will see the refresh, and has to respond to it. // It will also need to respond to WaitBlpPosition, saying it's already caught up. destMaster.FakeMysqlDaemon.FetchSuperQueryMap = map[string]*mproto.QueryResult{ "SELECT pos, flags FROM _vt.blp_checkpoint WHERE source_shard_uid=0": &mproto.QueryResult{ Rows: [][]sqltypes.Value{ []sqltypes.Value{ sqltypes.MakeString([]byte(myproto.EncodeReplicationPosition(sourceMaster.FakeMysqlDaemon.CurrentMasterPosition))), sqltypes.MakeString([]byte("")), }, }, }, } destMaster.StartActionLoop(t, wr) defer destMaster.StopActionLoop(t) // simulate the clone, by fixing the dest shard record if err := vp.Run([]string{"SourceShardAdd", "--tables", "gone1,gone2", "dest/0", "0", "source/0"}); err != nil { t.Fatalf("SourceShardAdd failed: %v", err) } // migrate rdonly over if err := vp.Run([]string{"MigrateServedFrom", "dest/0", "rdonly"}); err != nil { t.Fatalf("MigrateServedFrom(rdonly) failed: %v", err) } // check it's gone from keyspace ki, err = ts.GetKeyspace(ctx, "dest") if err != nil { t.Fatalf("GetKeyspace failed: %v", err) } if len(ki.ServedFroms) != 2 || ki.GetServedFrom(pb.TabletType_RDONLY) != nil { t.Fatalf("bad initial dest ServedFroms: %v", ki.ServedFroms) } // check the source shard has the right blacklisted tables si, err := ts.GetShard(ctx, "source", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } if len(si.TabletControls) != 1 || !reflect.DeepEqual(si.TabletControls, []*pb.Shard_TabletControl{ &pb.Shard_TabletControl{ TabletType: pb.TabletType_RDONLY, BlacklistedTables: []string{"gone1", "gone2"}, }, }) { t.Fatalf("rdonly type doesn't have right blacklisted tables") } // migrate replica over if err := vp.Run([]string{"MigrateServedFrom", "dest/0", "replica"}); err != nil { t.Fatalf("MigrateServedFrom(replica) failed: %v", err) } // check it's gone from keyspace ki, err = ts.GetKeyspace(ctx, "dest") if err != nil { t.Fatalf("GetKeyspace failed: %v", err) } if len(ki.ServedFroms) != 1 || ki.GetServedFrom(pb.TabletType_REPLICA) != nil { t.Fatalf("bad initial dest ServedFrom: %+v", ki.ServedFroms) } // check the source shard has the right blacklisted tables si, err = ts.GetShard(ctx, "source", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } if len(si.TabletControls) != 2 || !reflect.DeepEqual(si.TabletControls, []*pb.Shard_TabletControl{ &pb.Shard_TabletControl{ TabletType: pb.TabletType_RDONLY, BlacklistedTables: []string{"gone1", "gone2"}, }, &pb.Shard_TabletControl{ TabletType: pb.TabletType_REPLICA, BlacklistedTables: []string{"gone1", "gone2"}, }, }) { t.Fatalf("replica type doesn't have right blacklisted tables") } // migrate master over if err := vp.Run([]string{"MigrateServedFrom", "dest/0", "master"}); err != nil { t.Fatalf("MigrateServedFrom(master) failed: %v", err) } // make sure ServedFromMap is empty ki, err = ts.GetKeyspace(ctx, "dest") if err != nil { t.Fatalf("GetKeyspace failed: %v", err) } if len(ki.ServedFroms) > 0 { t.Fatalf("dest keyspace still is ServedFrom: %+v", ki.ServedFroms) } // check the source shard has the right blacklisted tables si, err = ts.GetShard(ctx, "source", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } if len(si.TabletControls) != 3 || !reflect.DeepEqual(si.TabletControls, []*pb.Shard_TabletControl{ &pb.Shard_TabletControl{ TabletType: pb.TabletType_RDONLY, BlacklistedTables: []string{"gone1", "gone2"}, }, &pb.Shard_TabletControl{ TabletType: pb.TabletType_REPLICA, BlacklistedTables: []string{"gone1", "gone2"}, }, &pb.Shard_TabletControl{ TabletType: pb.TabletType_MASTER, BlacklistedTables: []string{"gone1", "gone2"}, }, }) { t.Fatalf("master type doesn't have right blacklisted tables") } }
// PopulateReparentJournal returns the SQL command to use to populate // the _vt.reparent_journal table, as well as the time_created_ns // value used. func PopulateReparentJournal(timeCreatedNS int64, actionName, masterAlias string, pos proto.ReplicationPosition) string { return fmt.Sprintf("INSERT INTO _vt.reparent_journal "+ "(time_created_ns, action_name, master_alias, replication_position) "+ "VALUES (%v, '%v', '%v', '%v')", timeCreatedNS, actionName, masterAlias, proto.EncodeReplicationPosition(pos)) }