// Commit commits the current transaction. There are no retries on this operation. func (stc *ScatterConn) Commit(ctx context.Context, session *SafeSession) (err error) { if session == nil { return vterrors.FromError( vtrpc.ErrorCode_BAD_INPUT, fmt.Errorf("cannot commit: empty session"), ) } if !session.InTransaction() { return vterrors.FromError( vtrpc.ErrorCode_NOT_IN_TX, fmt.Errorf("cannot commit: not in transaction"), ) } committing := true for _, shardSession := range session.ShardSessions { tabletType := topo.TabletTypeToProto(shardSession.TabletType) if !committing { stc.gateway.Rollback(ctx, shardSession.Keyspace, shardSession.Shard, tabletType, shardSession.TransactionId) continue } if err = stc.gateway.Commit(ctx, shardSession.Keyspace, shardSession.Shard, tabletType, shardSession.TransactionId); err != nil { committing = false } } session.Reset() return err }
// StreamExecuteKeyRanges2 is the RPC version of // vtgateservice.VTGateService method func (vtg *VTGate) StreamExecuteKeyRanges2(ctx context.Context, request *proto.KeyRangeQuery, sendReply func(interface{}) error) (err error) { defer vtg.server.HandlePanic(&err) ctx = callerid.NewContext(ctx, callerid.GoRPCEffectiveCallerID(request.CallerID), callerid.NewImmediateCallerID("gorpc client")) vtgErr := vtg.server.StreamExecuteKeyRanges(ctx, request.Sql, request.BindVariables, request.Keyspace, key.KeyRangesToProto(request.KeyRanges), topo.TabletTypeToProto(request.TabletType), func(value *proto.QueryResult) error { return sendReply(value) }) if vtgErr == nil { return nil } if *vtgate.RPCErrorOnlyInReply { // If there was an app error, send a QueryResult back with it. qr := new(proto.QueryResult) vtgate.AddVtGateErrorToQueryResult(vtgErr, qr) // Sending back errors this way is not backwards compatible. If a (new) server sends an additional // QueryResult with an error, and the (old) client doesn't know how to read it, it will cause // problems where the client will get out of sync with the number of QueryResults sent. // That's why this the error is only sent this way when the --rpc_errors_only_in_reply flag is set // (signalling that all clients are able to handle new-style errors). return sendReply(qr) } return vtgErr }
func (agent *ActionAgent) allowQueries(tablet *topo.Tablet, blacklistedTables []string) error { // if the query service is already running, we're not starting it again if agent.QueryServiceControl.IsServing() { return nil } // only for real instances if agent.DBConfigs != nil { // Update our DB config to match the info we have in the tablet if agent.DBConfigs.App.DbName == "" { agent.DBConfigs.App.DbName = tablet.DbName() } agent.DBConfigs.App.Keyspace = tablet.Keyspace agent.DBConfigs.App.Shard = tablet.Shard if tablet.Type != topo.TYPE_MASTER { agent.DBConfigs.App.EnableInvalidator = true } else { agent.DBConfigs.App.EnableInvalidator = false } } err := agent.loadKeyspaceAndBlacklistRules(tablet, blacklistedTables) if err != nil { return err } return agent.QueryServiceControl.AllowQueries(&pb.Target{ Keyspace: tablet.Keyspace, Shard: tablet.Shard, TabletType: topo.TabletTypeToProto(tablet.Type), }, agent.DBConfigs, agent.SchemaOverrides, agent.MysqlDaemon) }
func (conn *vtgateConn) StreamExecuteKeyspaceIds(ctx context.Context, query string, keyspace string, keyspaceIds []key.KeyspaceId, bindVars map[string]interface{}, tabletType topo.TabletType) (<-chan *mproto.QueryResult, vtgateconn.ErrFunc, error) { req := &pb.StreamExecuteKeyspaceIdsRequest{ Query: tproto.BoundQueryToProto3(query, bindVars), Keyspace: keyspace, KeyspaceIds: key.KeyspaceIdsToProto(keyspaceIds), TabletType: topo.TabletTypeToProto(tabletType), } stream, err := conn.c.StreamExecuteKeyspaceIds(ctx, req) if err != nil { return nil, nil, err } sr := make(chan *mproto.QueryResult, 10) var finalError error go func() { for { ser, err := stream.Recv() if err != nil { if err != io.EOF { finalError = err } close(sr) return } if ser.Error != nil { finalError = vterrors.FromVtRPCError(ser.Error) close(sr) return } sr <- mproto.Proto3ToQueryResult(ser.Result) } }() return sr, func() error { return finalError }, nil }
// ChangeType is part of the tmclient.TabletManagerClient interface func (client *Client) ChangeType(ctx context.Context, tablet *topo.TabletInfo, dbType topo.TabletType) error { cc, c, err := client.dial(ctx, tablet) if err != nil { return err } defer cc.Close() _, err = c.ChangeType(ctx, &pb.ChangeTypeRequest{ TabletType: topo.TabletTypeToProto(dbType), }) return err }
// Rollback rolls back the current transaction. There are no retries on this operation. func (stc *ScatterConn) Rollback(ctx context.Context, session *SafeSession) (err error) { if session == nil { return nil } for _, shardSession := range session.ShardSessions { tabletType := topo.TabletTypeToProto(shardSession.TabletType) stc.gateway.Rollback(ctx, shardSession.Keyspace, shardSession.Shard, tabletType, shardSession.TransactionId) } session.Reset() return nil }
// RunHealthCheck is part of the tmclient.TabletManagerClient interface func (client *Client) RunHealthCheck(ctx context.Context, tablet *topo.TabletInfo, targetTabletType topo.TabletType) error { cc, c, err := client.dial(ctx, tablet) if err != nil { return err } defer cc.Close() _, err = c.RunHealthCheck(ctx, &pb.RunHealthCheckRequest{ TabletType: topo.TabletTypeToProto(targetTabletType), }) return err }
// GetEndPoints returns addresses for a tablet type in a shard // in a keyspace. func (tr *TopoReader) GetEndPoints(ctx context.Context, req *topo.GetEndPointsArgs, reply *pb.EndPoints) (err error) { tr.queryCount.Add(req.Cell, 1) addrs, _, err := tr.ts.GetEndPoints(ctx, req.Cell, req.Keyspace, req.Shard, topo.TabletTypeToProto(req.TabletType)) if err != nil { log.Warningf("GetEndPoints(%v,%v,%v,%v) failed: %v", req.Cell, req.Keyspace, req.Shard, req.TabletType, err) tr.errorCount.Add(req.Cell, 1) return err } *reply = *addrs return nil }
// StreamExecute is the RPC version of vtgateservice.VTGateService method func (vtg *VTGate) StreamExecute(ctx context.Context, request *proto.Query, sendReply func(interface{}) error) (err error) { defer vtg.server.HandlePanic(&err) ctx = callerid.NewContext(ctx, callerid.GoRPCEffectiveCallerID(request.CallerID), callerid.NewImmediateCallerID("gorpc client")) return vtg.server.StreamExecute(ctx, request.Sql, request.BindVariables, topo.TabletTypeToProto(request.TabletType), func(value *proto.QueryResult) error { return sendReply(value) }) }
// ExecuteBatchKeyspaceIds is the RPC version of // vtgateservice.VTGateService method func (vtg *VTGate) ExecuteBatchKeyspaceIds(ctx context.Context, request *proto.KeyspaceIdBatchQuery, reply *proto.QueryResultList) (err error) { defer vtg.server.HandlePanic(&err) ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*rpcTimeout)) defer cancel() ctx = callerid.NewContext(ctx, callerid.GoRPCEffectiveCallerID(request.CallerID), callerid.NewImmediateCallerID("gorpc client")) vtgErr := vtg.server.ExecuteBatchKeyspaceIds(ctx, request.Queries, topo.TabletTypeToProto(request.TabletType), request.AsTransaction, request.Session, reply) vtgate.AddVtGateError(vtgErr, &reply.Err) return nil }
// Execute is the RPC version of vtgateservice.VTGateService method func (vtg *VTGate) Execute(ctx context.Context, request *proto.Query, reply *proto.QueryResult) (err error) { defer vtg.server.HandlePanic(&err) ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*rpcTimeout)) defer cancel() ctx = callerid.NewContext(ctx, callerid.GoRPCEffectiveCallerID(request.CallerID), callerid.NewImmediateCallerID("gorpc client")) vtgErr := vtg.server.Execute(ctx, request.Sql, request.BindVariables, topo.TabletTypeToProto(request.TabletType), request.Session, request.NotInTransaction, reply) vtgate.AddVtGateErrorToQueryResult(vtgErr, reply) if *vtgate.RPCErrorOnlyInReply { return nil } return vtgErr }
func (conn *vtgateConn) Execute(ctx context.Context, query string, bindVars map[string]interface{}, tabletType topo.TabletType, notInTransaction bool, session interface{}) (*mproto.QueryResult, interface{}, error) { var s *pb.Session if session != nil { s = session.(*pb.Session) } request := &pb.ExecuteRequest{ Session: s, Query: tproto.BoundQueryToProto3(query, bindVars), TabletType: topo.TabletTypeToProto(tabletType), NotInTransaction: notInTransaction, } response, err := conn.c.Execute(ctx, request) if err != nil { return nil, session, err } if response.Error != nil { return nil, response.Session, vterrors.FromVtRPCError(response.Error) } return mproto.Proto3ToQueryResult(response.Result), response.Session, nil }
func (conn *vtgateConn) ExecuteBatchKeyspaceIds(ctx context.Context, queries []proto.BoundKeyspaceIdQuery, tabletType topo.TabletType, asTransaction bool, session interface{}) ([]mproto.QueryResult, interface{}, error) { var s *pb.Session if session != nil { s = session.(*pb.Session) } request := &pb.ExecuteBatchKeyspaceIdsRequest{ Session: s, Queries: proto.BoundKeyspaceIdQueriesToProto(queries), TabletType: topo.TabletTypeToProto(tabletType), AsTransaction: asTransaction, } response, err := conn.c.ExecuteBatchKeyspaceIds(ctx, request) if err != nil { return nil, session, err } if response.Error != nil { return nil, response.Session, vterrors.FromVtRPCError(response.Error) } return mproto.Proto3ToQueryResults(response.Results), response.Session, nil }
// StreamExecute2 is the RPC version of vtgateservice.VTGateService method func (vtg *VTGate) StreamExecute2(ctx context.Context, request *proto.Query, sendReply func(interface{}) error) (err error) { defer vtg.server.HandlePanic(&err) ctx = callerid.NewContext(ctx, callerid.GoRPCEffectiveCallerID(request.CallerID), callerid.NewImmediateCallerID("gorpc client")) vtgErr := vtg.server.StreamExecute(ctx, request.Sql, request.BindVariables, topo.TabletTypeToProto(request.TabletType), func(value *proto.QueryResult) error { return sendReply(value) }) if vtgErr == nil { return nil } // If there was an app error, send a QueryResult back with it. qr := new(proto.QueryResult) vtgate.AddVtGateError(vtgErr, &qr.Err) return sendReply(qr) }
// SessionToProto transforms a Session into proto3 func SessionToProto(s *Session) *pb.Session { if s == nil { return nil } result := &pb.Session{ InTransaction: s.InTransaction, } result.ShardSessions = make([]*pb.Session_ShardSession, len(s.ShardSessions)) for i, ss := range s.ShardSessions { result.ShardSessions[i] = &pb.Session_ShardSession{ Target: &pbq.Target{ Keyspace: ss.Keyspace, Shard: ss.Shard, TabletType: topo.TabletTypeToProto(ss.TabletType), }, TransactionId: ss.TransactionId, } } return result }
// ExecuteEntityIds is the RPC version of vtgateservice.VTGateService method func (vtg *VTGate) ExecuteEntityIds(ctx context.Context, request *proto.EntityIdsQuery, reply *proto.QueryResult) (err error) { defer vtg.server.HandlePanic(&err) ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*rpcTimeout)) defer cancel() ctx = callerid.NewContext(ctx, callerid.GoRPCEffectiveCallerID(request.CallerID), callerid.NewImmediateCallerID("gorpc client")) vtgErr := vtg.server.ExecuteEntityIds(ctx, request.Sql, request.BindVariables, request.Keyspace, request.EntityColumnName, proto.EntityIdsToProto(request.EntityKeyspaceIDs), topo.TabletTypeToProto(request.TabletType), request.Session, request.NotInTransaction, reply) vtgate.AddVtGateError(vtgErr, &reply.Err) return nil }
// InitializeConnections pre-initializes connections for all shards. // It also populates topology cache by accessing it. // It is not necessary to call this function before serving queries, // but it would reduce connection overhead when serving. func (stc *ScatterConn) InitializeConnections(ctx context.Context) error { ksNames, err := stc.toposerv.GetSrvKeyspaceNames(ctx, stc.cell) if err != nil { return err } var wg sync.WaitGroup var errRecorder concurrency.AllErrorRecorder for _, ksName := range ksNames { wg.Add(1) go func(keyspace string) { defer wg.Done() // get SrvKeyspace for cell/keyspace ks, err := stc.toposerv.GetSrvKeyspace(ctx, stc.cell, keyspace) if err != nil { errRecorder.RecordError(err) return } // work on all shards of all serving tablet types for tabletType, ksPartition := range ks.Partitions { tt := topo.TabletTypeToProto(tabletType) for _, shard := range ksPartition.ShardReferences { wg.Add(1) go func(shardName string, tabletType pb.TabletType) { defer wg.Done() err = stc.gateway.Dial(ctx, keyspace, shardName, tabletType) if err != nil { errRecorder.RecordError(err) return } }(shard.Name, tt) } } }(ksName) } wg.Wait() if errRecorder.HasErrors() { return errRecorder.Error() } return nil }
func (conn *vtgateConn) ExecuteKeyspaceIds(ctx context.Context, query string, keyspace string, keyspaceIds []key.KeyspaceId, bindVars map[string]interface{}, tabletType topo.TabletType, notInTransaction bool, session interface{}) (*mproto.QueryResult, interface{}, error) { var s *pb.Session if session != nil { s = session.(*pb.Session) } request := &pb.ExecuteKeyspaceIdsRequest{ CallerId: callerid.EffectiveCallerIDFromContext(ctx), Session: s, Query: tproto.BoundQueryToProto3(query, bindVars), Keyspace: keyspace, KeyspaceIds: key.KeyspaceIdsToProto(keyspaceIds), TabletType: topo.TabletTypeToProto(tabletType), NotInTransaction: notInTransaction, } response, err := conn.c.ExecuteKeyspaceIds(ctx, request) if err != nil { return nil, session, err } if response.Error != nil { return nil, response.Session, vterrors.FromVtRPCError(response.Error) } return mproto.Proto3ToQueryResult(response.Result), response.Session, nil }
// NewShardConn creates a new ShardConn. It creates a Balancer using // serv, cell, keyspace, tabletType and retryDelay. retryCount is the max // number of retries before a ShardConn returns an error on an operation. func NewShardConn(ctx context.Context, serv SrvTopoServer, cell, keyspace, shard string, tabletType topo.TabletType, retryDelay time.Duration, retryCount int, connTimeoutTotal, connTimeoutPerConn, connLife time.Duration, tabletConnectTimings *stats.MultiTimings) *ShardConn { getAddresses := func() (*pb.EndPoints, error) { endpoints, _, err := serv.GetEndPoints(ctx, cell, keyspace, shard, topo.TabletTypeToProto(tabletType)) if err != nil { return nil, fmt.Errorf("endpoints fetch error: %v", err) } return endpoints, nil } blc := NewBalancer(getAddresses, retryDelay) var ticker *timer.RandTicker if tabletType != topo.TYPE_MASTER { ticker = timer.NewRandTicker(connLife, connLife/2) } sdc := &ShardConn{ keyspace: keyspace, shard: shard, tabletType: tabletType, retryDelay: retryDelay, retryCount: retryCount, connTimeoutTotal: connTimeoutTotal, connTimeoutPerConn: connTimeoutPerConn, connLife: connLife, balancer: blc, ticker: ticker, consolidator: sync2.NewConsolidator(), connectTimings: tabletConnectTimings, } if ticker != nil { go func() { for range ticker.C { sdc.closeCurrent() } }() } return sdc }
// DbServingGraph returns the ServingGraph for the given cell. func DbServingGraph(ctx context.Context, ts topo.Server, cell string) (servingGraph *ServingGraph) { servingGraph = &ServingGraph{ Cell: cell, Keyspaces: make(map[string]*KeyspaceNodes), } rec := concurrency.AllErrorRecorder{} keyspaces, err := ts.GetSrvKeyspaceNames(ctx, cell) if err != nil { servingGraph.Errors = append(servingGraph.Errors, fmt.Sprintf("GetSrvKeyspaceNames failed: %v", err)) return } wg := sync.WaitGroup{} servingTypes := []topo.TabletType{topo.TYPE_MASTER, topo.TYPE_REPLICA, topo.TYPE_RDONLY} for _, keyspace := range keyspaces { kn := newKeyspaceNodes() servingGraph.Keyspaces[keyspace] = kn wg.Add(1) go func(keyspace string, kn *KeyspaceNodes) { defer wg.Done() ks, err := ts.GetSrvKeyspace(ctx, cell, keyspace) if err != nil { rec.RecordError(fmt.Errorf("GetSrvKeyspace(%v, %v) failed: %v", cell, keyspace, err)) return } if len(ks.ServedFrom) > 0 { kn.ServedFrom = make(map[topo.TabletType]string) for _, sf := range ks.ServedFrom { kn.ServedFrom[topo.ProtoToTabletType(sf.TabletType)] = sf.Keyspace } } displayedShards := make(map[string]bool) for _, partitionTabletType := range servingTypes { kp := topoproto.SrvKeyspaceGetPartition(ks, topo.TabletTypeToProto(partitionTabletType)) if kp == nil { continue } for _, srvShard := range kp.ShardReferences { shard := srvShard.Name if displayedShards[shard] { continue } displayedShards[shard] = true sn := &ShardNodes{ Name: shard, } kn.ShardNodes = append(kn.ShardNodes, sn) wg.Add(1) go func(shard string, sn *ShardNodes) { defer wg.Done() tabletTypes, err := ts.GetSrvTabletTypesPerShard(ctx, cell, keyspace, shard) if err != nil { rec.RecordError(fmt.Errorf("GetSrvTabletTypesPerShard(%v, %v, %v) failed: %v", cell, keyspace, shard, err)) return } for _, tabletType := range tabletTypes { endPoints, _, err := ts.GetEndPoints(ctx, cell, keyspace, shard, tabletType) if err != nil { rec.RecordError(fmt.Errorf("GetEndPoints(%v, %v, %v, %v) failed: %v", cell, keyspace, shard, tabletType, err)) continue } for _, endPoint := range endPoints.Entries { var tabletNode *TabletNodesByType for _, t := range sn.TabletNodes { if t.TabletType == tabletType { tabletNode = t break } } if tabletNode == nil { tabletNode = &TabletNodesByType{ TabletType: tabletType, } sn.TabletNodes = append(sn.TabletNodes, tabletNode) } tabletNode.Nodes = append(tabletNode.Nodes, newTabletNodeFromEndPoint(endPoint, cell)) } } }(shard, sn) } } }(keyspace, kn) } wg.Wait() servingGraph.Errors = rec.ErrorStrings() return }
// TestTabletControl verifies the shard's TabletControl record can disable // query service in a tablet. func TestTabletControl(t *testing.T) { ctx := context.Background() agent := createTestAgent(ctx, t) targetTabletType := topo.TYPE_REPLICA // first health check, should change us to replica before := time.Now() agent.runHealthCheck(targetTabletType) ti, err := agent.TopoServer.GetTablet(ctx, tabletAlias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if ti.Type != targetTabletType { t.Errorf("First health check failed to go to replica: %v", ti.Type) } if !agent.QueryServiceControl.IsServing() { t.Errorf("Query service should be running") } if agent._healthyTime.Sub(before) < 0 { t.Errorf("runHealthCheck did not update agent._healthyTime") } // now update the shard si, err := agent.TopoServer.GetShard(ctx, keyspace, shard) if err != nil { t.Fatalf("GetShard failed: %v", err) } si.TabletControls = []*pb.Shard_TabletControl{ &pb.Shard_TabletControl{ TabletType: topo.TabletTypeToProto(targetTabletType), DisableQueryService: true, }, } if err := topo.UpdateShard(ctx, agent.TopoServer, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // now refresh the tablet state, as the resharding process would do agent.RPCWrapLockAction(ctx, actionnode.TabletActionRefreshState, "", "", true, func() error { agent.RefreshState(ctx) return nil }) // check we shutdown query service if agent.QueryServiceControl.IsServing() { t.Errorf("Query service should not be running") } // check running a health check will not start it again before = time.Now() agent.runHealthCheck(targetTabletType) ti, err = agent.TopoServer.GetTablet(ctx, tabletAlias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if ti.Type != targetTabletType { t.Errorf("Health check failed to go to replica: %v", ti.Type) } if agent.QueryServiceControl.IsServing() { t.Errorf("Query service should not be running") } if agent._healthyTime.Sub(before) < 0 { t.Errorf("runHealthCheck did not update agent._healthyTime") } // go unhealthy, check we go to spare and QS is not running agent.HealthReporter.(*fakeHealthCheck).reportError = fmt.Errorf("tablet is unhealthy") before = time.Now() agent.runHealthCheck(targetTabletType) ti, err = agent.TopoServer.GetTablet(ctx, tabletAlias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if ti.Type != topo.TYPE_SPARE { t.Errorf("Unhealthy health check should go to spare: %v", ti.Type) } if agent.QueryServiceControl.IsServing() { t.Errorf("Query service should not be running") } if agent._healthyTime.Sub(before) < 0 { t.Errorf("runHealthCheck did not update agent._healthyTime") } // go back healthy, check QS is still not running agent.HealthReporter.(*fakeHealthCheck).reportError = nil before = time.Now() agent.runHealthCheck(targetTabletType) ti, err = agent.TopoServer.GetTablet(ctx, tabletAlias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if ti.Type != targetTabletType { t.Errorf("Healthy health check should go to replica: %v", ti.Type) } if agent.QueryServiceControl.IsServing() { t.Errorf("Query service should not be running") } if agent._healthyTime.Sub(before) < 0 { t.Errorf("runHealthCheck did not update agent._healthyTime") } }
// InitTablet creates or updates a tablet. If no parent is specified // in the tablet, and the tablet has a slave type, we will find the // appropriate parent. If createShardAndKeyspace is true and the // parent keyspace or shard don't exist, they will be created. If // update is true, and a tablet with the same ID exists, update it. // If Force is true, and a tablet with the same ID already exists, it // will be scrapped and deleted, and then recreated. func (wr *Wrangler) InitTablet(ctx context.Context, tablet *topo.Tablet, force, createShardAndKeyspace, update bool) error { if err := topo.TabletComplete(tablet); err != nil { return err } if topo.IsInReplicationGraph(tablet.Type) { // get the shard, possibly creating it var err error var si *topo.ShardInfo if createShardAndKeyspace { // create the parent keyspace and shard if needed si, err = topotools.GetOrCreateShard(ctx, wr.ts, tablet.Keyspace, tablet.Shard) } else { si, err = wr.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err == topo.ErrNoNode { return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard") } } // get the shard, checks a couple things if err != nil { return fmt.Errorf("cannot get (or create) shard %v/%v: %v", tablet.Keyspace, tablet.Shard, err) } if key.ProtoToKeyRange(si.KeyRange) != tablet.KeyRange { return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange) } if tablet.Type == topo.TYPE_MASTER && !topo.TabletAliasIsZero(si.MasterAlias) && topo.ProtoToTabletAlias(si.MasterAlias) != tablet.Alias && !force { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, tablet.Keyspace, tablet.Shard) } // update the shard record if needed if err := wr.updateShardCellsAndMaster(ctx, si, topo.TabletAliasToProto(tablet.Alias), topo.TabletTypeToProto(tablet.Type), force); err != nil { return err } } err := topo.CreateTablet(ctx, wr.ts, tablet) if err != nil && err == topo.ErrNodeExists { // Try to update nicely, but if it fails fall back to force behavior. if update || force { oldTablet, err := wr.ts.GetTablet(ctx, tablet.Alias) if err != nil { wr.Logger().Warningf("failed reading tablet %v: %v", tablet.Alias, err) } else { if oldTablet.Keyspace == tablet.Keyspace && oldTablet.Shard == tablet.Shard { *(oldTablet.Tablet) = *tablet if err := topo.UpdateTablet(ctx, wr.ts, oldTablet); err != nil { wr.Logger().Warningf("failed updating tablet %v: %v", tablet.Alias, err) // now fall through the Scrap case } else { if !topo.IsInReplicationGraph(tablet.Type) { return nil } if err := topo.UpdateTabletReplicationData(ctx, wr.ts, tablet); err != nil { wr.Logger().Warningf("failed updating tablet replication data for %v: %v", tablet.Alias, err) // now fall through the Scrap case } else { return nil } } } } } if force { if err = wr.Scrap(ctx, tablet.Alias, force, false); err != nil { wr.Logger().Errorf("failed scrapping tablet %v: %v", tablet.Alias, err) return err } if err := wr.ts.DeleteTablet(ctx, tablet.Alias); err != nil { // we ignore this wr.Logger().Errorf("failed deleting tablet %v: %v", tablet.Alias, err) } return topo.CreateTablet(ctx, wr.ts, tablet) } } return err }
// changeCallback is run after every action that might // have changed something in the tablet record. func (agent *ActionAgent) changeCallback(ctx context.Context, oldTablet, newTablet *topo.Tablet) error { span := trace.NewSpanFromContext(ctx) span.StartLocal("ActionAgent.changeCallback") defer span.Finish() allowQuery := newTablet.IsRunningQueryService() // Read the shard to get SourceShards / TabletControlMap if // we're going to use it. var shardInfo *topo.ShardInfo var tabletControl *pbt.Shard_TabletControl var blacklistedTables []string var err error if allowQuery { shardInfo, err = topo.GetShard(ctx, agent.TopoServer, newTablet.Keyspace, newTablet.Shard) if err != nil { log.Errorf("Cannot read shard for this tablet %v, might have inaccurate SourceShards and TabletControls: %v", newTablet.Alias, err) } else { if newTablet.Type == topo.TYPE_MASTER { allowQuery = len(shardInfo.SourceShards) == 0 } if tc := shardInfo.GetTabletControl(topo.TabletTypeToProto(newTablet.Type)); tc != nil { if topo.InCellList(newTablet.Alias.Cell, tc.Cells) { if tc.DisableQueryService { allowQuery = false } blacklistedTables = tc.BlacklistedTables tabletControl = tc } } } } // Read the keyspace on masters to get ShardingColumnType, // for binlog replication, only if source shards are set. var keyspaceInfo *topo.KeyspaceInfo if newTablet.Type == topo.TYPE_MASTER && shardInfo != nil && len(shardInfo.SourceShards) > 0 { keyspaceInfo, err = agent.TopoServer.GetKeyspace(ctx, newTablet.Keyspace) if err != nil { log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err) keyspaceInfo = nil } } if allowQuery { // There are a few transitions when we need to restart the query service: switch { // If either InitMaster or InitSlave was called, because those calls // (or a prior call to ResetReplication) may have silently broken the // rowcache invalidator by executing RESET MASTER. // Note that we don't care about fixing it after ResetReplication itself // since that call breaks everything on purpose, and we don't expect // anything to start working until either InitMaster or InitSlave. case agent.initReplication: agent.initReplication = false agent.disallowQueries() // Transitioning from replica to master, so clients that were already // connected don't keep on using the master as replica or rdonly. case newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER: agent.disallowQueries() // Having different parameters for the query service. // It needs to stop and restart with the new parameters. // That includes: // - changing KeyRange // - changing the BlacklistedTables list case (newTablet.KeyRange != oldTablet.KeyRange), !reflect.DeepEqual(blacklistedTables, agent.BlacklistedTables()): agent.disallowQueries() } if err := agent.allowQueries(newTablet, blacklistedTables); err != nil { log.Errorf("Cannot start query service: %v", err) } } else { agent.disallowQueries() } // save the tabletControl we've been using, so the background // healthcheck makes the same decisions as we've been making. agent.setTabletControl(tabletControl) // update stream needs to be started or stopped too if agent.DBConfigs != nil { if topo.IsRunningUpdateStream(newTablet.Type) { binlog.EnableUpdateStreamService(agent.DBConfigs.App.DbName, agent.MysqlDaemon) } else { binlog.DisableUpdateStreamService() } } statsType.Set(string(newTablet.Type)) statsKeyspace.Set(newTablet.Keyspace) statsShard.Set(newTablet.Shard) statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex())) statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex())) // See if we need to start or stop any binlog player if agent.BinlogPlayerMap != nil { if newTablet.Type == topo.TYPE_MASTER { agent.BinlogPlayerMap.RefreshMap(agent.batchCtx, newTablet, keyspaceInfo, shardInfo) } else { agent.BinlogPlayerMap.StopAllPlayersAndReset() } } return nil }