Beispiel #1
0
// Commit commits the current transaction. There are no retries on this operation.
func (stc *ScatterConn) Commit(ctx context.Context, session *SafeSession) (err error) {
	if session == nil {
		return vterrors.FromError(
			vtrpc.ErrorCode_BAD_INPUT,
			fmt.Errorf("cannot commit: empty session"),
		)
	}
	if !session.InTransaction() {
		return vterrors.FromError(
			vtrpc.ErrorCode_NOT_IN_TX,
			fmt.Errorf("cannot commit: not in transaction"),
		)
	}
	committing := true
	for _, shardSession := range session.ShardSessions {
		tabletType := topo.TabletTypeToProto(shardSession.TabletType)
		if !committing {
			stc.gateway.Rollback(ctx, shardSession.Keyspace, shardSession.Shard, tabletType, shardSession.TransactionId)
			continue
		}
		if err = stc.gateway.Commit(ctx, shardSession.Keyspace, shardSession.Shard, tabletType, shardSession.TransactionId); err != nil {
			committing = false
		}
	}
	session.Reset()
	return err
}
Beispiel #2
0
// StreamExecuteKeyRanges2 is the RPC version of
// vtgateservice.VTGateService method
func (vtg *VTGate) StreamExecuteKeyRanges2(ctx context.Context, request *proto.KeyRangeQuery, sendReply func(interface{}) error) (err error) {
	defer vtg.server.HandlePanic(&err)
	ctx = callerid.NewContext(ctx,
		callerid.GoRPCEffectiveCallerID(request.CallerID),
		callerid.NewImmediateCallerID("gorpc client"))
	vtgErr := vtg.server.StreamExecuteKeyRanges(ctx,
		request.Sql,
		request.BindVariables,
		request.Keyspace,
		key.KeyRangesToProto(request.KeyRanges),
		topo.TabletTypeToProto(request.TabletType),
		func(value *proto.QueryResult) error {
			return sendReply(value)
		})
	if vtgErr == nil {
		return nil
	}
	if *vtgate.RPCErrorOnlyInReply {
		// If there was an app error, send a QueryResult back with it.
		qr := new(proto.QueryResult)
		vtgate.AddVtGateErrorToQueryResult(vtgErr, qr)
		// Sending back errors this way is not backwards compatible. If a (new) server sends an additional
		// QueryResult with an error, and the (old) client doesn't know how to read it, it will cause
		// problems where the client will get out of sync with the number of QueryResults sent.
		// That's why this the error is only sent this way when the --rpc_errors_only_in_reply flag is set
		// (signalling that all clients are able to handle new-style errors).
		return sendReply(qr)
	}
	return vtgErr
}
Beispiel #3
0
func (agent *ActionAgent) allowQueries(tablet *topo.Tablet, blacklistedTables []string) error {
	// if the query service is already running, we're not starting it again
	if agent.QueryServiceControl.IsServing() {
		return nil
	}

	// only for real instances
	if agent.DBConfigs != nil {
		// Update our DB config to match the info we have in the tablet
		if agent.DBConfigs.App.DbName == "" {
			agent.DBConfigs.App.DbName = tablet.DbName()
		}
		agent.DBConfigs.App.Keyspace = tablet.Keyspace
		agent.DBConfigs.App.Shard = tablet.Shard
		if tablet.Type != topo.TYPE_MASTER {
			agent.DBConfigs.App.EnableInvalidator = true
		} else {
			agent.DBConfigs.App.EnableInvalidator = false
		}
	}

	err := agent.loadKeyspaceAndBlacklistRules(tablet, blacklistedTables)
	if err != nil {
		return err
	}

	return agent.QueryServiceControl.AllowQueries(&pb.Target{
		Keyspace:   tablet.Keyspace,
		Shard:      tablet.Shard,
		TabletType: topo.TabletTypeToProto(tablet.Type),
	}, agent.DBConfigs, agent.SchemaOverrides, agent.MysqlDaemon)
}
Beispiel #4
0
func (conn *vtgateConn) StreamExecuteKeyspaceIds(ctx context.Context, query string, keyspace string, keyspaceIds []key.KeyspaceId, bindVars map[string]interface{}, tabletType topo.TabletType) (<-chan *mproto.QueryResult, vtgateconn.ErrFunc, error) {
	req := &pb.StreamExecuteKeyspaceIdsRequest{
		Query:       tproto.BoundQueryToProto3(query, bindVars),
		Keyspace:    keyspace,
		KeyspaceIds: key.KeyspaceIdsToProto(keyspaceIds),
		TabletType:  topo.TabletTypeToProto(tabletType),
	}
	stream, err := conn.c.StreamExecuteKeyspaceIds(ctx, req)
	if err != nil {
		return nil, nil, err
	}
	sr := make(chan *mproto.QueryResult, 10)
	var finalError error
	go func() {
		for {
			ser, err := stream.Recv()
			if err != nil {
				if err != io.EOF {
					finalError = err
				}
				close(sr)
				return
			}
			if ser.Error != nil {
				finalError = vterrors.FromVtRPCError(ser.Error)
				close(sr)
				return
			}
			sr <- mproto.Proto3ToQueryResult(ser.Result)
		}
	}()
	return sr, func() error {
		return finalError
	}, nil
}
Beispiel #5
0
// ChangeType is part of the tmclient.TabletManagerClient interface
func (client *Client) ChangeType(ctx context.Context, tablet *topo.TabletInfo, dbType topo.TabletType) error {
	cc, c, err := client.dial(ctx, tablet)
	if err != nil {
		return err
	}
	defer cc.Close()
	_, err = c.ChangeType(ctx, &pb.ChangeTypeRequest{
		TabletType: topo.TabletTypeToProto(dbType),
	})
	return err
}
Beispiel #6
0
// Rollback rolls back the current transaction. There are no retries on this operation.
func (stc *ScatterConn) Rollback(ctx context.Context, session *SafeSession) (err error) {
	if session == nil {
		return nil
	}
	for _, shardSession := range session.ShardSessions {
		tabletType := topo.TabletTypeToProto(shardSession.TabletType)
		stc.gateway.Rollback(ctx, shardSession.Keyspace, shardSession.Shard, tabletType, shardSession.TransactionId)
	}
	session.Reset()
	return nil
}
Beispiel #7
0
// RunHealthCheck is part of the tmclient.TabletManagerClient interface
func (client *Client) RunHealthCheck(ctx context.Context, tablet *topo.TabletInfo, targetTabletType topo.TabletType) error {
	cc, c, err := client.dial(ctx, tablet)
	if err != nil {
		return err
	}
	defer cc.Close()
	_, err = c.RunHealthCheck(ctx, &pb.RunHealthCheckRequest{
		TabletType: topo.TabletTypeToProto(targetTabletType),
	})
	return err
}
Beispiel #8
0
// GetEndPoints returns addresses for a tablet type in a shard
// in a keyspace.
func (tr *TopoReader) GetEndPoints(ctx context.Context, req *topo.GetEndPointsArgs, reply *pb.EndPoints) (err error) {
	tr.queryCount.Add(req.Cell, 1)
	addrs, _, err := tr.ts.GetEndPoints(ctx, req.Cell, req.Keyspace, req.Shard, topo.TabletTypeToProto(req.TabletType))
	if err != nil {
		log.Warningf("GetEndPoints(%v,%v,%v,%v) failed: %v", req.Cell, req.Keyspace, req.Shard, req.TabletType, err)
		tr.errorCount.Add(req.Cell, 1)
		return err
	}
	*reply = *addrs
	return nil
}
Beispiel #9
0
// StreamExecute is the RPC version of vtgateservice.VTGateService method
func (vtg *VTGate) StreamExecute(ctx context.Context, request *proto.Query, sendReply func(interface{}) error) (err error) {
	defer vtg.server.HandlePanic(&err)
	ctx = callerid.NewContext(ctx,
		callerid.GoRPCEffectiveCallerID(request.CallerID),
		callerid.NewImmediateCallerID("gorpc client"))
	return vtg.server.StreamExecute(ctx,
		request.Sql,
		request.BindVariables,
		topo.TabletTypeToProto(request.TabletType),
		func(value *proto.QueryResult) error {
			return sendReply(value)
		})
}
Beispiel #10
0
// ExecuteBatchKeyspaceIds is the RPC version of
// vtgateservice.VTGateService method
func (vtg *VTGate) ExecuteBatchKeyspaceIds(ctx context.Context, request *proto.KeyspaceIdBatchQuery, reply *proto.QueryResultList) (err error) {
	defer vtg.server.HandlePanic(&err)
	ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*rpcTimeout))
	defer cancel()
	ctx = callerid.NewContext(ctx,
		callerid.GoRPCEffectiveCallerID(request.CallerID),
		callerid.NewImmediateCallerID("gorpc client"))
	vtgErr := vtg.server.ExecuteBatchKeyspaceIds(ctx,
		request.Queries,
		topo.TabletTypeToProto(request.TabletType),
		request.AsTransaction,
		request.Session,
		reply)
	vtgate.AddVtGateError(vtgErr, &reply.Err)
	return nil
}
Beispiel #11
0
// Execute is the RPC version of vtgateservice.VTGateService method
func (vtg *VTGate) Execute(ctx context.Context, request *proto.Query, reply *proto.QueryResult) (err error) {
	defer vtg.server.HandlePanic(&err)
	ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*rpcTimeout))
	defer cancel()
	ctx = callerid.NewContext(ctx,
		callerid.GoRPCEffectiveCallerID(request.CallerID),
		callerid.NewImmediateCallerID("gorpc client"))
	vtgErr := vtg.server.Execute(ctx,
		request.Sql,
		request.BindVariables,
		topo.TabletTypeToProto(request.TabletType),
		request.Session,
		request.NotInTransaction,
		reply)
	vtgate.AddVtGateErrorToQueryResult(vtgErr, reply)
	if *vtgate.RPCErrorOnlyInReply {
		return nil
	}
	return vtgErr
}
Beispiel #12
0
func (conn *vtgateConn) Execute(ctx context.Context, query string, bindVars map[string]interface{}, tabletType topo.TabletType, notInTransaction bool, session interface{}) (*mproto.QueryResult, interface{}, error) {
	var s *pb.Session
	if session != nil {
		s = session.(*pb.Session)
	}
	request := &pb.ExecuteRequest{
		Session:          s,
		Query:            tproto.BoundQueryToProto3(query, bindVars),
		TabletType:       topo.TabletTypeToProto(tabletType),
		NotInTransaction: notInTransaction,
	}
	response, err := conn.c.Execute(ctx, request)
	if err != nil {
		return nil, session, err
	}
	if response.Error != nil {
		return nil, response.Session, vterrors.FromVtRPCError(response.Error)
	}
	return mproto.Proto3ToQueryResult(response.Result), response.Session, nil
}
Beispiel #13
0
func (conn *vtgateConn) ExecuteBatchKeyspaceIds(ctx context.Context, queries []proto.BoundKeyspaceIdQuery, tabletType topo.TabletType, asTransaction bool, session interface{}) ([]mproto.QueryResult, interface{}, error) {
	var s *pb.Session
	if session != nil {
		s = session.(*pb.Session)
	}
	request := &pb.ExecuteBatchKeyspaceIdsRequest{
		Session:       s,
		Queries:       proto.BoundKeyspaceIdQueriesToProto(queries),
		TabletType:    topo.TabletTypeToProto(tabletType),
		AsTransaction: asTransaction,
	}
	response, err := conn.c.ExecuteBatchKeyspaceIds(ctx, request)
	if err != nil {
		return nil, session, err
	}
	if response.Error != nil {
		return nil, response.Session, vterrors.FromVtRPCError(response.Error)
	}
	return mproto.Proto3ToQueryResults(response.Results), response.Session, nil
}
Beispiel #14
0
// StreamExecute2 is the RPC version of vtgateservice.VTGateService method
func (vtg *VTGate) StreamExecute2(ctx context.Context, request *proto.Query, sendReply func(interface{}) error) (err error) {
	defer vtg.server.HandlePanic(&err)
	ctx = callerid.NewContext(ctx,
		callerid.GoRPCEffectiveCallerID(request.CallerID),
		callerid.NewImmediateCallerID("gorpc client"))
	vtgErr := vtg.server.StreamExecute(ctx,
		request.Sql,
		request.BindVariables,
		topo.TabletTypeToProto(request.TabletType),
		func(value *proto.QueryResult) error {
			return sendReply(value)
		})
	if vtgErr == nil {
		return nil
	}
	// If there was an app error, send a QueryResult back with it.
	qr := new(proto.QueryResult)
	vtgate.AddVtGateError(vtgErr, &qr.Err)
	return sendReply(qr)
}
Beispiel #15
0
// SessionToProto transforms a Session into proto3
func SessionToProto(s *Session) *pb.Session {
	if s == nil {
		return nil
	}
	result := &pb.Session{
		InTransaction: s.InTransaction,
	}
	result.ShardSessions = make([]*pb.Session_ShardSession, len(s.ShardSessions))
	for i, ss := range s.ShardSessions {
		result.ShardSessions[i] = &pb.Session_ShardSession{
			Target: &pbq.Target{
				Keyspace:   ss.Keyspace,
				Shard:      ss.Shard,
				TabletType: topo.TabletTypeToProto(ss.TabletType),
			},
			TransactionId: ss.TransactionId,
		}
	}
	return result
}
Beispiel #16
0
// ExecuteEntityIds is the RPC version of vtgateservice.VTGateService method
func (vtg *VTGate) ExecuteEntityIds(ctx context.Context, request *proto.EntityIdsQuery, reply *proto.QueryResult) (err error) {
	defer vtg.server.HandlePanic(&err)
	ctx, cancel := context.WithDeadline(ctx, time.Now().Add(*rpcTimeout))
	defer cancel()
	ctx = callerid.NewContext(ctx,
		callerid.GoRPCEffectiveCallerID(request.CallerID),
		callerid.NewImmediateCallerID("gorpc client"))
	vtgErr := vtg.server.ExecuteEntityIds(ctx,
		request.Sql,
		request.BindVariables,
		request.Keyspace,
		request.EntityColumnName,
		proto.EntityIdsToProto(request.EntityKeyspaceIDs),
		topo.TabletTypeToProto(request.TabletType),
		request.Session,
		request.NotInTransaction,
		reply)
	vtgate.AddVtGateError(vtgErr, &reply.Err)
	return nil
}
Beispiel #17
0
// InitializeConnections pre-initializes connections for all shards.
// It also populates topology cache by accessing it.
// It is not necessary to call this function before serving queries,
// but it would reduce connection overhead when serving.
func (stc *ScatterConn) InitializeConnections(ctx context.Context) error {
	ksNames, err := stc.toposerv.GetSrvKeyspaceNames(ctx, stc.cell)
	if err != nil {
		return err
	}
	var wg sync.WaitGroup
	var errRecorder concurrency.AllErrorRecorder
	for _, ksName := range ksNames {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			// get SrvKeyspace for cell/keyspace
			ks, err := stc.toposerv.GetSrvKeyspace(ctx, stc.cell, keyspace)
			if err != nil {
				errRecorder.RecordError(err)
				return
			}
			// work on all shards of all serving tablet types
			for tabletType, ksPartition := range ks.Partitions {
				tt := topo.TabletTypeToProto(tabletType)
				for _, shard := range ksPartition.ShardReferences {
					wg.Add(1)
					go func(shardName string, tabletType pb.TabletType) {
						defer wg.Done()
						err = stc.gateway.Dial(ctx, keyspace, shardName, tabletType)
						if err != nil {
							errRecorder.RecordError(err)
							return
						}
					}(shard.Name, tt)
				}
			}
		}(ksName)
	}
	wg.Wait()
	if errRecorder.HasErrors() {
		return errRecorder.Error()
	}
	return nil
}
Beispiel #18
0
func (conn *vtgateConn) ExecuteKeyspaceIds(ctx context.Context, query string, keyspace string, keyspaceIds []key.KeyspaceId, bindVars map[string]interface{}, tabletType topo.TabletType, notInTransaction bool, session interface{}) (*mproto.QueryResult, interface{}, error) {
	var s *pb.Session
	if session != nil {
		s = session.(*pb.Session)
	}
	request := &pb.ExecuteKeyspaceIdsRequest{
		CallerId:         callerid.EffectiveCallerIDFromContext(ctx),
		Session:          s,
		Query:            tproto.BoundQueryToProto3(query, bindVars),
		Keyspace:         keyspace,
		KeyspaceIds:      key.KeyspaceIdsToProto(keyspaceIds),
		TabletType:       topo.TabletTypeToProto(tabletType),
		NotInTransaction: notInTransaction,
	}
	response, err := conn.c.ExecuteKeyspaceIds(ctx, request)
	if err != nil {
		return nil, session, err
	}
	if response.Error != nil {
		return nil, response.Session, vterrors.FromVtRPCError(response.Error)
	}
	return mproto.Proto3ToQueryResult(response.Result), response.Session, nil
}
Beispiel #19
0
// NewShardConn creates a new ShardConn. It creates a Balancer using
// serv, cell, keyspace, tabletType and retryDelay. retryCount is the max
// number of retries before a ShardConn returns an error on an operation.
func NewShardConn(ctx context.Context, serv SrvTopoServer, cell, keyspace, shard string, tabletType topo.TabletType, retryDelay time.Duration, retryCount int, connTimeoutTotal, connTimeoutPerConn, connLife time.Duration, tabletConnectTimings *stats.MultiTimings) *ShardConn {
	getAddresses := func() (*pb.EndPoints, error) {
		endpoints, _, err := serv.GetEndPoints(ctx, cell, keyspace, shard, topo.TabletTypeToProto(tabletType))
		if err != nil {
			return nil, fmt.Errorf("endpoints fetch error: %v", err)
		}
		return endpoints, nil
	}
	blc := NewBalancer(getAddresses, retryDelay)
	var ticker *timer.RandTicker
	if tabletType != topo.TYPE_MASTER {
		ticker = timer.NewRandTicker(connLife, connLife/2)
	}
	sdc := &ShardConn{
		keyspace:           keyspace,
		shard:              shard,
		tabletType:         tabletType,
		retryDelay:         retryDelay,
		retryCount:         retryCount,
		connTimeoutTotal:   connTimeoutTotal,
		connTimeoutPerConn: connTimeoutPerConn,
		connLife:           connLife,
		balancer:           blc,
		ticker:             ticker,
		consolidator:       sync2.NewConsolidator(),
		connectTimings:     tabletConnectTimings,
	}
	if ticker != nil {
		go func() {
			for range ticker.C {
				sdc.closeCurrent()
			}
		}()
	}
	return sdc
}
Beispiel #20
0
// DbServingGraph returns the ServingGraph for the given cell.
func DbServingGraph(ctx context.Context, ts topo.Server, cell string) (servingGraph *ServingGraph) {
	servingGraph = &ServingGraph{
		Cell:      cell,
		Keyspaces: make(map[string]*KeyspaceNodes),
	}
	rec := concurrency.AllErrorRecorder{}

	keyspaces, err := ts.GetSrvKeyspaceNames(ctx, cell)
	if err != nil {
		servingGraph.Errors = append(servingGraph.Errors, fmt.Sprintf("GetSrvKeyspaceNames failed: %v", err))
		return
	}
	wg := sync.WaitGroup{}
	servingTypes := []topo.TabletType{topo.TYPE_MASTER, topo.TYPE_REPLICA, topo.TYPE_RDONLY}
	for _, keyspace := range keyspaces {
		kn := newKeyspaceNodes()
		servingGraph.Keyspaces[keyspace] = kn
		wg.Add(1)
		go func(keyspace string, kn *KeyspaceNodes) {
			defer wg.Done()

			ks, err := ts.GetSrvKeyspace(ctx, cell, keyspace)
			if err != nil {
				rec.RecordError(fmt.Errorf("GetSrvKeyspace(%v, %v) failed: %v", cell, keyspace, err))
				return
			}
			if len(ks.ServedFrom) > 0 {
				kn.ServedFrom = make(map[topo.TabletType]string)
				for _, sf := range ks.ServedFrom {
					kn.ServedFrom[topo.ProtoToTabletType(sf.TabletType)] = sf.Keyspace
				}
			}

			displayedShards := make(map[string]bool)
			for _, partitionTabletType := range servingTypes {
				kp := topoproto.SrvKeyspaceGetPartition(ks, topo.TabletTypeToProto(partitionTabletType))
				if kp == nil {
					continue
				}
				for _, srvShard := range kp.ShardReferences {
					shard := srvShard.Name
					if displayedShards[shard] {
						continue
					}
					displayedShards[shard] = true

					sn := &ShardNodes{
						Name: shard,
					}
					kn.ShardNodes = append(kn.ShardNodes, sn)
					wg.Add(1)
					go func(shard string, sn *ShardNodes) {
						defer wg.Done()
						tabletTypes, err := ts.GetSrvTabletTypesPerShard(ctx, cell, keyspace, shard)
						if err != nil {
							rec.RecordError(fmt.Errorf("GetSrvTabletTypesPerShard(%v, %v, %v) failed: %v", cell, keyspace, shard, err))
							return
						}
						for _, tabletType := range tabletTypes {
							endPoints, _, err := ts.GetEndPoints(ctx, cell, keyspace, shard, tabletType)
							if err != nil {
								rec.RecordError(fmt.Errorf("GetEndPoints(%v, %v, %v, %v) failed: %v", cell, keyspace, shard, tabletType, err))
								continue
							}
							for _, endPoint := range endPoints.Entries {
								var tabletNode *TabletNodesByType
								for _, t := range sn.TabletNodes {
									if t.TabletType == tabletType {
										tabletNode = t
										break
									}
								}
								if tabletNode == nil {
									tabletNode = &TabletNodesByType{
										TabletType: tabletType,
									}
									sn.TabletNodes = append(sn.TabletNodes, tabletNode)
								}
								tabletNode.Nodes = append(tabletNode.Nodes, newTabletNodeFromEndPoint(endPoint, cell))
							}
						}
					}(shard, sn)
				}
			}
		}(keyspace, kn)
	}
	wg.Wait()
	servingGraph.Errors = rec.ErrorStrings()
	return
}
Beispiel #21
0
// TestTabletControl verifies the shard's TabletControl record can disable
// query service in a tablet.
func TestTabletControl(t *testing.T) {
	ctx := context.Background()
	agent := createTestAgent(ctx, t)
	targetTabletType := topo.TYPE_REPLICA

	// first health check, should change us to replica
	before := time.Now()
	agent.runHealthCheck(targetTabletType)
	ti, err := agent.TopoServer.GetTablet(ctx, tabletAlias)
	if err != nil {
		t.Fatalf("GetTablet failed: %v", err)
	}
	if ti.Type != targetTabletType {
		t.Errorf("First health check failed to go to replica: %v", ti.Type)
	}
	if !agent.QueryServiceControl.IsServing() {
		t.Errorf("Query service should be running")
	}
	if agent._healthyTime.Sub(before) < 0 {
		t.Errorf("runHealthCheck did not update agent._healthyTime")
	}

	// now update the shard
	si, err := agent.TopoServer.GetShard(ctx, keyspace, shard)
	if err != nil {
		t.Fatalf("GetShard failed: %v", err)
	}
	si.TabletControls = []*pb.Shard_TabletControl{
		&pb.Shard_TabletControl{
			TabletType:          topo.TabletTypeToProto(targetTabletType),
			DisableQueryService: true,
		},
	}
	if err := topo.UpdateShard(ctx, agent.TopoServer, si); err != nil {
		t.Fatalf("UpdateShard failed: %v", err)
	}

	// now refresh the tablet state, as the resharding process would do
	agent.RPCWrapLockAction(ctx, actionnode.TabletActionRefreshState, "", "", true, func() error {
		agent.RefreshState(ctx)
		return nil
	})

	// check we shutdown query service
	if agent.QueryServiceControl.IsServing() {
		t.Errorf("Query service should not be running")
	}

	// check running a health check will not start it again
	before = time.Now()
	agent.runHealthCheck(targetTabletType)
	ti, err = agent.TopoServer.GetTablet(ctx, tabletAlias)
	if err != nil {
		t.Fatalf("GetTablet failed: %v", err)
	}
	if ti.Type != targetTabletType {
		t.Errorf("Health check failed to go to replica: %v", ti.Type)
	}
	if agent.QueryServiceControl.IsServing() {
		t.Errorf("Query service should not be running")
	}
	if agent._healthyTime.Sub(before) < 0 {
		t.Errorf("runHealthCheck did not update agent._healthyTime")
	}

	// go unhealthy, check we go to spare and QS is not running
	agent.HealthReporter.(*fakeHealthCheck).reportError = fmt.Errorf("tablet is unhealthy")
	before = time.Now()
	agent.runHealthCheck(targetTabletType)
	ti, err = agent.TopoServer.GetTablet(ctx, tabletAlias)
	if err != nil {
		t.Fatalf("GetTablet failed: %v", err)
	}
	if ti.Type != topo.TYPE_SPARE {
		t.Errorf("Unhealthy health check should go to spare: %v", ti.Type)
	}
	if agent.QueryServiceControl.IsServing() {
		t.Errorf("Query service should not be running")
	}
	if agent._healthyTime.Sub(before) < 0 {
		t.Errorf("runHealthCheck did not update agent._healthyTime")
	}

	// go back healthy, check QS is still not running
	agent.HealthReporter.(*fakeHealthCheck).reportError = nil
	before = time.Now()
	agent.runHealthCheck(targetTabletType)
	ti, err = agent.TopoServer.GetTablet(ctx, tabletAlias)
	if err != nil {
		t.Fatalf("GetTablet failed: %v", err)
	}
	if ti.Type != targetTabletType {
		t.Errorf("Healthy health check should go to replica: %v", ti.Type)
	}
	if agent.QueryServiceControl.IsServing() {
		t.Errorf("Query service should not be running")
	}
	if agent._healthyTime.Sub(before) < 0 {
		t.Errorf("runHealthCheck did not update agent._healthyTime")
	}
}
Beispiel #22
0
// InitTablet creates or updates a tablet. If no parent is specified
// in the tablet, and the tablet has a slave type, we will find the
// appropriate parent. If createShardAndKeyspace is true and the
// parent keyspace or shard don't exist, they will be created.  If
// update is true, and a tablet with the same ID exists, update it.
// If Force is true, and a tablet with the same ID already exists, it
// will be scrapped and deleted, and then recreated.
func (wr *Wrangler) InitTablet(ctx context.Context, tablet *topo.Tablet, force, createShardAndKeyspace, update bool) error {
	if err := topo.TabletComplete(tablet); err != nil {
		return err
	}

	if topo.IsInReplicationGraph(tablet.Type) {
		// get the shard, possibly creating it
		var err error
		var si *topo.ShardInfo

		if createShardAndKeyspace {
			// create the parent keyspace and shard if needed
			si, err = topotools.GetOrCreateShard(ctx, wr.ts, tablet.Keyspace, tablet.Shard)
		} else {
			si, err = wr.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard)
			if err == topo.ErrNoNode {
				return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard")
			}
		}

		// get the shard, checks a couple things
		if err != nil {
			return fmt.Errorf("cannot get (or create) shard %v/%v: %v", tablet.Keyspace, tablet.Shard, err)
		}
		if key.ProtoToKeyRange(si.KeyRange) != tablet.KeyRange {
			return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange)
		}
		if tablet.Type == topo.TYPE_MASTER && !topo.TabletAliasIsZero(si.MasterAlias) && topo.ProtoToTabletAlias(si.MasterAlias) != tablet.Alias && !force {
			return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", si.MasterAlias, tablet.Keyspace, tablet.Shard)
		}

		// update the shard record if needed
		if err := wr.updateShardCellsAndMaster(ctx, si, topo.TabletAliasToProto(tablet.Alias), topo.TabletTypeToProto(tablet.Type), force); err != nil {
			return err
		}
	}

	err := topo.CreateTablet(ctx, wr.ts, tablet)
	if err != nil && err == topo.ErrNodeExists {
		// Try to update nicely, but if it fails fall back to force behavior.
		if update || force {
			oldTablet, err := wr.ts.GetTablet(ctx, tablet.Alias)
			if err != nil {
				wr.Logger().Warningf("failed reading tablet %v: %v", tablet.Alias, err)
			} else {
				if oldTablet.Keyspace == tablet.Keyspace && oldTablet.Shard == tablet.Shard {
					*(oldTablet.Tablet) = *tablet
					if err := topo.UpdateTablet(ctx, wr.ts, oldTablet); err != nil {
						wr.Logger().Warningf("failed updating tablet %v: %v", tablet.Alias, err)
						// now fall through the Scrap case
					} else {
						if !topo.IsInReplicationGraph(tablet.Type) {
							return nil
						}

						if err := topo.UpdateTabletReplicationData(ctx, wr.ts, tablet); err != nil {
							wr.Logger().Warningf("failed updating tablet replication data for %v: %v", tablet.Alias, err)
							// now fall through the Scrap case
						} else {
							return nil
						}
					}
				}
			}
		}
		if force {
			if err = wr.Scrap(ctx, tablet.Alias, force, false); err != nil {
				wr.Logger().Errorf("failed scrapping tablet %v: %v", tablet.Alias, err)
				return err
			}
			if err := wr.ts.DeleteTablet(ctx, tablet.Alias); err != nil {
				// we ignore this
				wr.Logger().Errorf("failed deleting tablet %v: %v", tablet.Alias, err)
			}
			return topo.CreateTablet(ctx, wr.ts, tablet)
		}
	}
	return err
}
Beispiel #23
0
// changeCallback is run after every action that might
// have changed something in the tablet record.
func (agent *ActionAgent) changeCallback(ctx context.Context, oldTablet, newTablet *topo.Tablet) error {
	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("ActionAgent.changeCallback")
	defer span.Finish()

	allowQuery := newTablet.IsRunningQueryService()

	// Read the shard to get SourceShards / TabletControlMap if
	// we're going to use it.
	var shardInfo *topo.ShardInfo
	var tabletControl *pbt.Shard_TabletControl
	var blacklistedTables []string
	var err error
	if allowQuery {
		shardInfo, err = topo.GetShard(ctx, agent.TopoServer, newTablet.Keyspace, newTablet.Shard)
		if err != nil {
			log.Errorf("Cannot read shard for this tablet %v, might have inaccurate SourceShards and TabletControls: %v", newTablet.Alias, err)
		} else {
			if newTablet.Type == topo.TYPE_MASTER {
				allowQuery = len(shardInfo.SourceShards) == 0
			}
			if tc := shardInfo.GetTabletControl(topo.TabletTypeToProto(newTablet.Type)); tc != nil {
				if topo.InCellList(newTablet.Alias.Cell, tc.Cells) {
					if tc.DisableQueryService {
						allowQuery = false
					}
					blacklistedTables = tc.BlacklistedTables
					tabletControl = tc
				}
			}
		}
	}

	// Read the keyspace on masters to get ShardingColumnType,
	// for binlog replication, only if source shards are set.
	var keyspaceInfo *topo.KeyspaceInfo
	if newTablet.Type == topo.TYPE_MASTER && shardInfo != nil && len(shardInfo.SourceShards) > 0 {
		keyspaceInfo, err = agent.TopoServer.GetKeyspace(ctx, newTablet.Keyspace)
		if err != nil {
			log.Errorf("Cannot read keyspace for this tablet %v: %v", newTablet.Alias, err)
			keyspaceInfo = nil
		}
	}

	if allowQuery {
		// There are a few transitions when we need to restart the query service:
		switch {
		// If either InitMaster or InitSlave was called, because those calls
		// (or a prior call to ResetReplication) may have silently broken the
		// rowcache invalidator by executing RESET MASTER.
		// Note that we don't care about fixing it after ResetReplication itself
		// since that call breaks everything on purpose, and we don't expect
		// anything to start working until either InitMaster or InitSlave.
		case agent.initReplication:
			agent.initReplication = false
			agent.disallowQueries()

		// Transitioning from replica to master, so clients that were already
		// connected don't keep on using the master as replica or rdonly.
		case newTablet.Type == topo.TYPE_MASTER && oldTablet.Type != topo.TYPE_MASTER:
			agent.disallowQueries()

		// Having different parameters for the query service.
		// It needs to stop and restart with the new parameters.
		// That includes:
		//   - changing KeyRange
		//   - changing the BlacklistedTables list
		case (newTablet.KeyRange != oldTablet.KeyRange),
			!reflect.DeepEqual(blacklistedTables, agent.BlacklistedTables()):
			agent.disallowQueries()
		}

		if err := agent.allowQueries(newTablet, blacklistedTables); err != nil {
			log.Errorf("Cannot start query service: %v", err)
		}
	} else {
		agent.disallowQueries()
	}

	// save the tabletControl we've been using, so the background
	// healthcheck makes the same decisions as we've been making.
	agent.setTabletControl(tabletControl)

	// update stream needs to be started or stopped too
	if agent.DBConfigs != nil {
		if topo.IsRunningUpdateStream(newTablet.Type) {
			binlog.EnableUpdateStreamService(agent.DBConfigs.App.DbName, agent.MysqlDaemon)
		} else {
			binlog.DisableUpdateStreamService()
		}
	}

	statsType.Set(string(newTablet.Type))
	statsKeyspace.Set(newTablet.Keyspace)
	statsShard.Set(newTablet.Shard)
	statsKeyRangeStart.Set(string(newTablet.KeyRange.Start.Hex()))
	statsKeyRangeEnd.Set(string(newTablet.KeyRange.End.Hex()))

	// See if we need to start or stop any binlog player
	if agent.BinlogPlayerMap != nil {
		if newTablet.Type == topo.TYPE_MASTER {
			agent.BinlogPlayerMap.RefreshMap(agent.batchCtx, newTablet, keyspaceInfo, shardInfo)
		} else {
			agent.BinlogPlayerMap.StopAllPlayersAndReset()
		}
	}
	return nil
}