Example #1
0
// UpdateStream is part of the vtgate service API.
func (vtg *VTGate) UpdateStream(ctx context.Context, keyspace string, shard string, keyRange *topodatapb.KeyRange, tabletType topodatapb.TabletType, timestamp int64, event *querypb.EventToken, sendReply func(*querypb.StreamEvent, int64) error) error {
	startTime := time.Now()
	ltt := topoproto.TabletTypeLString(tabletType)
	statsKey := []string{"UpdateStream", keyspace, ltt}
	defer vtg.timings.Record(statsKey, startTime)

	err := vtg.resolver.UpdateStream(
		ctx,
		keyspace,
		shard,
		keyRange,
		tabletType,
		timestamp,
		event,
		sendReply,
	)
	if err != nil {
		normalErrors.Add(statsKey, 1)
		query := map[string]interface{}{
			"Keyspace":   keyspace,
			"Shard":      shard,
			"KeyRange":   keyRange,
			"TabletType": ltt,
			"Timestamp":  timestamp,
		}
		logError(err, query, vtg.logUpdateStream)
	}
	return formatError(err)
}
// StatusAsHTML returns an HTML version of our status.
// It works best if there is data in the cache.
func (st *SrvKeyspaceCacheStatus) StatusAsHTML() template.HTML {
	if st.Value == nil {
		return template.HTML("No Data")
	}

	result := "<b>Partitions:</b><br>"
	for tabletType, keyspacePartition := range st.Value.Partitions {
		result += "&nbsp;<b>" + string(tabletType) + "</b>"
		for _, shard := range keyspacePartition.ShardReferences {
			result += "&nbsp;" + shard.Name
		}
		result += "<br>"
	}

	if st.Value.ShardingColumnName != "" {
		result += "<b>ShardingColumnName:</b>&nbsp;" + st.Value.ShardingColumnName + "<br>"
		result += "<b>ShardingColumnType:</b>&nbsp;" + string(st.Value.ShardingColumnType) + "<br>"
	}

	if len(st.Value.ServedFrom) > 0 {
		result += "<b>ServedFrom:</b><br>"
		for _, sf := range st.Value.ServedFrom {
			result += "&nbsp;<b>" + topoproto.TabletTypeLString(sf.TabletType) + "</b>&nbsp;" + sf.Keyspace + "<br>"
		}
	}

	return template.HTML(result)
}
Example #3
0
// ExecuteKeyRanges executes a non-streaming query based on the specified keyranges.
func (vtg *VTGate) ExecuteKeyRanges(ctx context.Context, sql string, bindVariables map[string]interface{}, keyspace string, keyRanges []*topodatapb.KeyRange, tabletType topodatapb.TabletType, session *vtgatepb.Session, notInTransaction bool) (*sqltypes.Result, error) {
	startTime := time.Now()
	ltt := topoproto.TabletTypeLString(tabletType)
	statsKey := []string{"ExecuteKeyRanges", keyspace, ltt}
	defer vtg.timings.Record(statsKey, startTime)

	sql = sqlannotation.AddFilteredReplicationUnfriendlyIfDML(sql)

	qr, err := vtg.resolver.ExecuteKeyRanges(ctx, sql, bindVariables, keyspace, keyRanges, tabletType, session, notInTransaction)
	if err == nil {
		vtg.rowsReturned.Add(statsKey, int64(len(qr.Rows)))
		return qr, nil
	}

	query := map[string]interface{}{
		"Sql":              sql,
		"BindVariables":    bindVariables,
		"Keyspace":         keyspace,
		"KeyRanges":        keyRanges,
		"TabletType":       ltt,
		"Session":          session,
		"NotInTransaction": notInTransaction,
	}
	handleExecuteError(err, statsKey, query, vtg.logExecuteKeyRanges)
	return nil, err
}
Example #4
0
// ExecuteBatchKeyspaceIds executes a group of queries based on the specified keyspace ids.
func (vtg *VTGate) ExecuteBatchKeyspaceIds(ctx context.Context, queries []*vtgatepb.BoundKeyspaceIdQuery, tabletType topodatapb.TabletType, asTransaction bool, session *vtgatepb.Session, options *querypb.ExecuteOptions) ([]sqltypes.Result, error) {
	startTime := time.Now()
	ltt := topoproto.TabletTypeLString(tabletType)
	statsKey := []string{"ExecuteBatchKeyspaceIds", "", ltt}
	defer vtg.timings.Record(statsKey, startTime)

	annotateBoundKeyspaceIDQueries(queries)

	qrs, err := vtg.resolver.ExecuteBatchKeyspaceIds(
		ctx,
		queries,
		tabletType,
		asTransaction,
		session,
		options)
	if err == nil {
		var rowCount int64
		for _, qr := range qrs {
			rowCount += int64(len(qr.Rows))
		}
		vtg.rowsReturned.Add(statsKey, rowCount)
		return qrs, nil
	}

	query := map[string]interface{}{
		"Queries":       queries,
		"TabletType":    ltt,
		"AsTransaction": asTransaction,
		"Session":       session,
		"Options":       options,
	}
	err = handleExecuteError(err, statsKey, query, vtg.logExecuteBatchKeyspaceIds)
	return nil, err
}
Example #5
0
// StreamExecuteShards executes a streaming query on the specified shards.
func (vtg *VTGate) StreamExecuteShards(ctx context.Context, sql string, bindVariables map[string]interface{}, keyspace string, shards []string, tabletType topodatapb.TabletType, options *querypb.ExecuteOptions, sendReply func(*sqltypes.Result) error) error {
	startTime := time.Now()
	ltt := topoproto.TabletTypeLString(tabletType)
	statsKey := []string{"StreamExecuteShards", keyspace, ltt}
	defer vtg.timings.Record(statsKey, startTime)

	err := vtg.resolver.streamExecute(
		ctx,
		sql,
		bindVariables,
		keyspace,
		tabletType,
		func(keyspace string) (string, []string, error) {
			return keyspace, shards, nil
		},
		options,
		func(reply *sqltypes.Result) error {
			vtg.rowsReturned.Add(statsKey, int64(len(reply.Rows)))
			return sendReply(reply)
		})

	if err != nil {
		normalErrors.Add(statsKey, 1)
		query := map[string]interface{}{
			"Sql":           sql,
			"BindVariables": bindVariables,
			"Keyspace":      keyspace,
			"Shards":        shards,
			"TabletType":    ltt,
			"Options":       options,
		}
		logError(err, query, vtg.logStreamExecuteShards)
	}
	return formatError(err)
}
Example #6
0
// ExecuteEntityIds excutes a non-streaming query based on given KeyspaceId map.
func (vtg *VTGate) ExecuteEntityIds(ctx context.Context, sql string, bindVariables map[string]interface{}, keyspace string, entityColumnName string, entityKeyspaceIDs []*vtgatepb.ExecuteEntityIdsRequest_EntityId, tabletType topodatapb.TabletType, session *vtgatepb.Session, notInTransaction bool, options *querypb.ExecuteOptions) (*sqltypes.Result, error) {
	startTime := time.Now()
	ltt := topoproto.TabletTypeLString(tabletType)
	statsKey := []string{"ExecuteEntityIds", keyspace, ltt}
	defer vtg.timings.Record(statsKey, startTime)

	sql = sqlannotation.AnnotateIfDML(sql, nil)

	qr, err := vtg.resolver.ExecuteEntityIds(ctx, sql, bindVariables, keyspace, entityColumnName, entityKeyspaceIDs, tabletType, session, notInTransaction, options)
	if err == nil {
		vtg.rowsReturned.Add(statsKey, int64(len(qr.Rows)))
		return qr, nil
	}

	query := map[string]interface{}{
		"Sql":               sql,
		"BindVariables":     bindVariables,
		"Keyspace":          keyspace,
		"EntityColumnName":  entityColumnName,
		"EntityKeyspaceIDs": entityKeyspaceIDs,
		"TabletType":        ltt,
		"Session":           session,
		"NotInTransaction":  notInTransaction,
		"Options":           options,
	}
	err = handleExecuteError(err, statsKey, query, vtg.logExecuteEntityIds)
	return nil, err
}
Example #7
0
// StreamExecuteKeyRanges executes a streaming query on the specified KeyRanges.
// The KeyRanges are resolved to shards using the serving graph.
// This function currently temporarily enforces the restriction of executing on
// one shard since it cannot merge-sort the results to guarantee ordering of
// response which is needed for checkpointing.
// The api supports supplying multiple keyranges to make it future proof.
func (vtg *VTGate) StreamExecuteKeyRanges(ctx context.Context, sql string, bindVariables map[string]interface{}, keyspace string, keyRanges []*topodatapb.KeyRange, tabletType topodatapb.TabletType, sendReply func(*sqltypes.Result) error) error {
	startTime := time.Now()
	ltt := topoproto.TabletTypeLString(tabletType)
	statsKey := []string{"StreamExecuteKeyRanges", keyspace, ltt}
	defer vtg.timings.Record(statsKey, startTime)

	var rowCount int64
	err := vtg.resolver.StreamExecuteKeyRanges(
		ctx,
		sql,
		bindVariables,
		keyspace,
		keyRanges,
		tabletType,
		func(reply *sqltypes.Result) error {
			rowCount += int64(len(reply.Rows))
			vtg.rowsReturned.Add(statsKey, int64(len(reply.Rows)))
			return sendReply(reply)
		})

	if err != nil {
		normalErrors.Add(statsKey, 1)
		query := map[string]interface{}{
			"Sql":           sql,
			"BindVariables": bindVariables,
			"Keyspace":      keyspace,
			"KeyRanges":     keyRanges,
			"TabletType":    ltt,
		}
		logError(err, query, vtg.logStreamExecuteKeyRanges)
	}
	return formatError(err)
}
Example #8
0
// ExecuteBatchShards executes a group of queries on the specified shards.
func (vtg *VTGate) ExecuteBatchShards(ctx context.Context, queries []*vtgatepb.BoundShardQuery, tabletType topodatapb.TabletType, asTransaction bool, session *vtgatepb.Session) ([]sqltypes.Result, error) {
	startTime := time.Now()
	ltt := topoproto.TabletTypeLString(tabletType)
	statsKey := []string{"ExecuteBatchShards", "", ltt}
	defer vtg.timings.Record(statsKey, startTime)

	annotateBoundShardQueriesAsUnfriendly(queries)

	qrs, err := vtg.resolver.ExecuteBatch(
		ctx,
		tabletType,
		asTransaction,
		session,
		func() (*scatterBatchRequest, error) {
			return boundShardQueriesToScatterBatchRequest(queries)
		})
	if err == nil {
		var rowCount int64
		for _, qr := range qrs {
			rowCount += int64(len(qr.Rows))
		}
		vtg.rowsReturned.Add(statsKey, rowCount)
		return qrs, nil
	}

	query := map[string]interface{}{
		"Queries":       queries,
		"TabletType":    ltt,
		"AsTransaction": asTransaction,
		"Session":       session,
	}
	handleExecuteError(err, statsKey, query, vtg.logExecuteBatchShards)
	return nil, err
}
Example #9
0
func getKeyspaceShards(ctx context.Context, topoServ topo.SrvTopoServer, cell, keyspace string, tabletType topodatapb.TabletType) (string, *topodatapb.SrvKeyspace, []*topodatapb.ShardReference, error) {
	srvKeyspace, err := topoServ.GetSrvKeyspace(ctx, cell, keyspace)
	if err != nil {
		return "", nil, nil, vterrors.NewVitessError(
			vtrpcpb.ErrorCode_INTERNAL_ERROR, err,
			"keyspace %v fetch error: %v", keyspace, err,
		)
	}

	// check if the keyspace has been redirected for this tabletType.
	for _, sf := range srvKeyspace.ServedFrom {
		if sf.TabletType == tabletType {
			keyspace = sf.Keyspace
			srvKeyspace, err = topoServ.GetSrvKeyspace(ctx, cell, keyspace)
			if err != nil {
				return "", nil, nil, vterrors.NewVitessError(
					vtrpcpb.ErrorCode_INTERNAL_ERROR, err,
					"keyspace %v fetch error: %v", keyspace, err,
				)
			}
		}
	}

	partition := topoproto.SrvKeyspaceGetPartition(srvKeyspace, tabletType)
	if partition == nil {
		return "", nil, nil, vterrors.NewVitessError(
			vtrpcpb.ErrorCode_INTERNAL_ERROR, err,
			"No partition found for tabletType %v in keyspace %v", topoproto.TabletTypeLString(tabletType), keyspace,
		)
	}
	return keyspace, srvKeyspace, partition.ShardReferences, nil
}
Example #10
0
// NewShardError returns a ShardError which preserves the original
// error code if possible, adds the connection context and adds a bit
// to determine whether the keyspace/shard needs to be re-resolved for
// a potential sharding event (namely, if we were in a transaction).
func NewShardError(in error, keyspace, shard string, tabletType topodatapb.TabletType, tablet *topodatapb.Tablet, inTransaction bool) error {
	if in == nil {
		return nil
	}
	var shardIdentifier string
	if tablet != nil {
		shardIdentifier = fmt.Sprintf("%s.%s.%s, %+v", keyspace, shard, topoproto.TabletTypeLString(tabletType), tablet)
	} else {
		shardIdentifier = fmt.Sprintf("%s.%s.%s", keyspace, shard, topoproto.TabletTypeLString(tabletType))
	}

	return &ShardError{
		ShardIdentifier: shardIdentifier,
		InTransaction:   inTransaction,
		Err:             in,
		ErrorCode:       vterrors.RecoverVtErrorCode(in),
	}
}
Example #11
0
func (hc *HealthCheckImpl) checkHealthCheckTimeout() {
	hc.mu.RLock()
	list := make([]*healthCheckConn, 0, len(hc.addrToConns))
	for _, hcc := range hc.addrToConns {
		list = append(list, hcc)
	}
	hc.mu.RUnlock()
	for _, hcc := range list {
		hcc.mu.RLock()
		if !hcc.tabletStats.Serving {
			// ignore non-serving tablet
			hcc.mu.RUnlock()
			continue
		}
		if time.Now().Sub(hcc.lastResponseTimestamp) < hc.healthCheckTimeout {
			// received a healthcheck response recently
			hcc.mu.RUnlock()
			continue
		}
		hcc.mu.RUnlock()
		// mark the tablet non-serving as we have not seen a health check response for a long time
		hcc.mu.Lock()
		// check again to avoid race condition
		if !hcc.tabletStats.Serving {
			// ignore non-serving tablet
			hcc.mu.Unlock()
			continue
		}
		if time.Now().Sub(hcc.lastResponseTimestamp) < hc.healthCheckTimeout {
			// received a healthcheck response recently
			hcc.mu.Unlock()
			continue
		}
		hcc.tabletStats.Serving = false
		hcc.tabletStats.LastError = fmt.Errorf("healthcheck timed out (latest %v)", hcc.lastResponseTimestamp)
		ts := hcc.tabletStats
		hcc.mu.Unlock()
		// notify downstream for serving status change
		if hc.listener != nil {
			hc.listener.StatsUpdate(&ts)
		}
		hcErrorCounters.Add([]string{ts.Target.Keyspace, ts.Target.Shard, topoproto.TabletTypeLString(ts.Target.TabletType)}, 1)
	}
}
Example #12
0
func (lg *l2VTGateGateway) getStatsAggregator(conn *l2VTGateConn, tabletType topodatapb.TabletType) *TabletStatusAggregator {
	key := fmt.Sprintf("%v:%v", conn.addr, topoproto.TabletTypeLString(tabletType))

	// get existing aggregator
	lg.mu.RLock()
	aggr, ok := lg.statusAggregators[key]
	lg.mu.RUnlock()
	if ok {
		return aggr
	}
	// create a new one, but check again before the creation
	lg.mu.Lock()
	defer lg.mu.Unlock()
	aggr, ok = lg.statusAggregators[key]
	if ok {
		return aggr
	}
	aggr = NewTabletStatusAggregator(conn.keyspace, conn.shard, tabletType, key)
	lg.statusAggregators[key] = aggr
	return aggr
}
Example #13
0
// changeCallback is run after every action that might
// have changed something in the tablet record or in the topology.
//
// It owns making changes to the BinlogPlayerMap. The input for this is the
// tablet type (has to be master), and the shard's SourceShards.
//
// It owns updating the blacklisted tables.
//
// It owns updating the stats record for 'TabletType'.
//
// It owns starting and stopping the update stream service.
//
// It owns reading the TabletControl for the current tablet, and storing it.
func (agent *ActionAgent) changeCallback(ctx context.Context, oldTablet, newTablet *topodatapb.Tablet) {
	span := trace.NewSpanFromContext(ctx)
	span.StartLocal("ActionAgent.changeCallback")
	defer span.Finish()

	allowQuery := topo.IsRunningQueryService(newTablet.Type)
	broadcastHealth := false
	runUpdateStream := allowQuery

	// Read the shard to get SourceShards / TabletControlMap if
	// we're going to use it.
	var shardInfo *topo.ShardInfo
	var err error
	var disallowQueryReason string
	var blacklistedTables []string
	updateBlacklistedTables := true
	if allowQuery {
		shardInfo, err = agent.TopoServer.GetShard(ctx, newTablet.Keyspace, newTablet.Shard)
		if err != nil {
			log.Errorf("Cannot read shard for this tablet %v, might have inaccurate SourceShards and TabletControls: %v", newTablet.Alias, err)
			updateBlacklistedTables = false
		} else {
			if newTablet.Type == topodatapb.TabletType_MASTER {
				if len(shardInfo.SourceShards) > 0 {
					allowQuery = false
					disallowQueryReason = "master tablet with filtered replication on"
				}
			}
			if tc := shardInfo.GetTabletControl(newTablet.Type); tc != nil {
				if topo.InCellList(newTablet.Alias.Cell, tc.Cells) {
					if tc.DisableQueryService {
						allowQuery = false
						disallowQueryReason = "TabletControl.DisableQueryService set"
					}
					blacklistedTables = tc.BlacklistedTables
				}
			}
		}
	} else {
		disallowQueryReason = fmt.Sprintf("not a serving tablet type(%v)", newTablet.Type)
	}
	agent.setServicesDesiredState(disallowQueryReason, runUpdateStream)
	if updateBlacklistedTables {
		if err := agent.loadBlacklistRules(newTablet, blacklistedTables); err != nil {
			// FIXME(alainjobart) how to handle this error?
			log.Errorf("Cannot update blacklisted tables rule: %v", err)
		} else {
			agent.setBlacklistedTables(blacklistedTables)
		}
	}

	if allowQuery {
		// Query service should be running.
		if oldTablet.Type == topodatapb.TabletType_REPLICA &&
			newTablet.Type == topodatapb.TabletType_MASTER {
			// When promoting from replica to master, allow both master and replica
			// queries to be served during gracePeriod.
			if _, err := agent.QueryServiceControl.SetServingType(newTablet.Type,
				true, []topodatapb.TabletType{oldTablet.Type}); err == nil {
				// If successful, broadcast to vtgate and then wait.
				agent.broadcastHealth()
				time.Sleep(*gracePeriod)
			} else {
				log.Errorf("Can't start query service for MASTER+REPLICA mode: %v", err)
			}
		}

		if stateChanged, err := agent.QueryServiceControl.SetServingType(newTablet.Type, true, nil); err == nil {
			// If the state changed, broadcast to vtgate.
			// (e.g. this happens when the tablet was already master, but it just
			// changed from NOT_SERVING to SERVING due to
			// "vtctl MigrateServedFrom ... master".)
			if stateChanged {
				broadcastHealth = true
			}
		} else {
			runUpdateStream = false
			log.Errorf("Cannot start query service: %v", err)
		}
	} else {
		// Query service should be stopped.
		if topo.IsSubjectToLameduck(oldTablet.Type) &&
			newTablet.Type == topodatapb.TabletType_SPARE &&
			*gracePeriod > 0 {
			// When a non-MASTER serving type is going SPARE,
			// put query service in lameduck during gracePeriod.
			agent.lameduck(disallowQueryReason)
		}

		log.Infof("Disabling query service on type change, reason: %v", disallowQueryReason)
		if stateChanged, err := agent.QueryServiceControl.SetServingType(newTablet.Type, false, nil); err == nil {
			// If the state changed, broadcast to vtgate.
			// (e.g. this happens when the tablet was already master, but it just
			// changed from SERVING to NOT_SERVING because filtered replication was
			// enabled.)
			if stateChanged {
				broadcastHealth = true
			}
		} else {
			log.Errorf("SetServingType(serving=false) failed: %v", err)
		}
	}

	// update stream needs to be started or stopped too
	if topo.IsRunningUpdateStream(newTablet.Type) && runUpdateStream {
		agent.UpdateStream.Enable()
	} else {
		agent.UpdateStream.Disable()
	}

	// upate the stats to our current type
	if agent.exportStats {
		agent.statsTabletType.Set(topoproto.TabletTypeLString(newTablet.Type))
	}

	// See if we need to start or stop any binlog player
	if agent.BinlogPlayerMap != nil {
		if newTablet.Type == topodatapb.TabletType_MASTER {
			agent.BinlogPlayerMap.RefreshMap(agent.batchCtx, newTablet, shardInfo)
		} else {
			agent.BinlogPlayerMap.StopAllPlayersAndReset()
		}
	}

	// Broadcast health changes to vtgate immediately.
	if broadcastHealth {
		agent.broadcastHealth()
	}
}
Example #14
0
// checkConn performs health checking on the given tablet.
func (hc *HealthCheckImpl) checkConn(hcc *healthCheckConn, name string) {
	defer hc.wg.Done()
	defer func() {
		hcc.mu.Lock()
		if hcc.conn != nil {
			hcc.conn.Close()
			hcc.conn = nil
		}
		hcc.mu.Unlock()
	}()
	// retry health check if it fails
	for {
		// Try to connect to the tablet.
		stream, err := hcc.connect(hc)
		if err != nil {
			select {
			case <-hcc.ctx.Done():
				return
			default:
			}
			hcc.mu.Lock()
			hcc.tabletStats.Serving = false
			hcc.tabletStats.LastError = err
			target := hcc.tabletStats.Target
			hcc.mu.Unlock()
			hcErrorCounters.Add([]string{target.Keyspace, target.Shard, topoproto.TabletTypeLString(target.TabletType)}, 1)

			// Sleep until the next retry is up or the context is done/canceled.
			select {
			case <-hcc.ctx.Done():
			case <-time.After(hc.retryDelay):
			}
			continue
		}

		// Read stream health responses.
		for {
			reconnect, err := hcc.processResponse(hc, stream)
			if err != nil {
				hcc.mu.Lock()
				hcc.tabletStats.Serving = false
				hcc.tabletStats.LastError = err
				ts := hcc.tabletStats
				hcc.mu.Unlock()
				// notify downstream for serving status change
				if hc.listener != nil {
					hc.listener.StatsUpdate(&ts)
				}
				select {
				case <-hcc.ctx.Done():
					return
				default:
				}
				hcErrorCounters.Add([]string{ts.Target.Keyspace, ts.Target.Shard, topoproto.TabletTypeLString(ts.Target.TabletType)}, 1)
				if reconnect {
					hcc.mu.Lock()
					hcc.conn.Close()
					hcc.conn = nil
					hcc.tabletStats.Target = &querypb.Target{}
					hcc.mu.Unlock()

					// Sleep until the next retry is up or the context is done/canceled.
					select {
					case <-hcc.ctx.Done():
					case <-time.After(hc.retryDelay):
					}
					break
				}
			}
		}
	}
}
Example #15
0
func (stc *ScatterConn) startAction(name, keyspace, shard string, tabletType topodatapb.TabletType) (time.Time, []string) {
	statsKey := []string{name, keyspace, shard, topoproto.TabletTypeLString(tabletType)}
	startTime := time.Now()
	return startTime, statsKey
}
Example #16
0
func (l *L2VTGate) startAction(name string, target *querypb.Target) (time.Time, []string) {
	statsKey := []string{name, target.Keyspace, target.Shard, topoproto.TabletTypeLString(target.TabletType)}
	startTime := time.Now()
	return startTime, statsKey
}
Example #17
0
// servingConnStats returns the number of serving tablets per keyspace/shard/tablet type.
func (hc *HealthCheckImpl) servingConnStats() map[string]int64 {
	res := make(map[string]int64)
	hc.mu.RLock()
	defer hc.mu.RUnlock()
	for _, hcc := range hc.addrToConns {
		hcc.mu.RLock()
		if !hcc.tabletStats.Up || !hcc.tabletStats.Serving || hcc.tabletStats.LastError != nil {
			hcc.mu.RUnlock()
			continue
		}
		key := fmt.Sprintf("%s.%s.%s", hcc.tabletStats.Target.Keyspace, hcc.tabletStats.Target.Shard, topoproto.TabletTypeLString(hcc.tabletStats.Target.TabletType))
		hcc.mu.RUnlock()
		res[key]++
	}
	return res
}