Exemplo n.º 1
0
func (c *context) FinishMessage(ch *nsqd.Channel, clientID int64, clientAddr string, msgID nsqd.MessageID) error {
	if c.nsqdCoord == nil {
		_, _, _, err := ch.FinishMessage(clientID, clientAddr, msgID)
		if err == nil {
			ch.ContinueConsumeForOrder()
		}
		return err
	}
	return c.nsqdCoord.FinishMessageToCluster(ch, clientID, clientAddr, msgID)
}
Exemplo n.º 2
0
func (c *context) SetChannelOffset(ch *nsqd.Channel, startFrom *ConsumeOffset, force bool) (int64, int64, error) {
	var l *consistence.CommitLogData
	var queueOffset int64
	cnt := int64(0)
	var err error
	if startFrom.OffsetType == offsetTimestampType {
		if c.nsqdCoord != nil {
			l, queueOffset, cnt, err = c.nsqdCoord.SearchLogByMsgTimestamp(ch.GetTopicName(), ch.GetTopicPart(), startFrom.OffsetValue)
		} else {
			err = errors.New("Not supported while coordinator disabled")
		}
	} else if startFrom.OffsetType == offsetSpecialType {
		if startFrom.OffsetValue == -1 {
			e := ch.GetChannelEnd()
			queueOffset = int64(e.Offset())
			cnt = e.TotalMsgCnt()
		} else {
			nsqd.NsqLogger().Logf("not known special offset :%v", startFrom)
			err = errors.New("not supported offset type")
		}
	} else if startFrom.OffsetType == offsetVirtualQueueType {
		queueOffset = startFrom.OffsetValue
		cnt = 0
		if c.nsqdCoord != nil {
			l, queueOffset, cnt, err = c.nsqdCoord.SearchLogByMsgOffset(ch.GetTopicName(), ch.GetTopicPart(), queueOffset)
		} else {
			err = errors.New("Not supported while coordinator disabled")
		}
	} else if startFrom.OffsetType == offsetMsgCountType {
		if c.nsqdCoord != nil {
			l, queueOffset, cnt, err = c.nsqdCoord.SearchLogByMsgCnt(ch.GetTopicName(), ch.GetTopicPart(), startFrom.OffsetValue)
		} else {
			err = errors.New("Not supported while coordinator disabled")
		}
	} else {
		nsqd.NsqLogger().Logf("not supported offset type:%v", startFrom)
		err = errors.New("not supported offset type")
	}
	if err != nil {
		nsqd.NsqLogger().Logf("failed to search the consume offset: %v, err:%v", startFrom, err)
		return 0, 0, err
	}
	nsqd.NsqLogger().Logf("%v searched log : %v, offset: %v:%v", startFrom, l, queueOffset, cnt)
	if c.nsqdCoord == nil {
		err = ch.SetConsumeOffset(nsqd.BackendOffset(queueOffset), cnt, force)
		if err != nil {
			if err != nsqd.ErrSetConsumeOffsetNotFirstClient {
				nsqd.NsqLogger().Logf("failed to set the consume offset: %v, err:%v", startFrom, err)
				return 0, 0, err
			}
			nsqd.NsqLogger().Logf("the consume offset: %v can only be set by the first client", startFrom)
		}
	} else {
		err = c.nsqdCoord.SetChannelConsumeOffsetToCluster(ch, queueOffset, cnt, force)
		if err != nil {
			if coordErr, ok := err.(*consistence.CommonCoordErr); ok {
				if coordErr.IsEqual(consistence.ErrLocalSetChannelOffsetNotFirstClient) {
					nsqd.NsqLogger().Logf("the consume offset: %v can only be set by the first client", startFrom)
					return queueOffset, cnt, nil
				}
			}
			nsqd.NsqLogger().Logf("failed to set the consume offset: %v (%v:%v), err: %v ", startFrom, queueOffset, cnt, err)
			return 0, 0, err
		}
	}
	return queueOffset, cnt, nil
}
Exemplo n.º 3
0
func (p *protocolV2) messagePump(client *nsqd.ClientV2, startedChan chan bool,
	stoppedChan chan bool) {
	var err error
	var buf bytes.Buffer
	var clientMsgChan chan *nsqd.Message
	var subChannel *nsqd.Channel
	// NOTE: `flusherChan` is used to bound message latency for
	// the pathological case of a channel on a low volume topic
	// with >1 clients having >1 RDY counts
	var flusherChan <-chan time.Time
	var sampleRate int32

	subEventChan := client.SubEventChan
	identifyEventChan := client.IdentifyEventChan
	outputBufferTicker := time.NewTicker(client.OutputBufferTimeout)
	heartbeatTicker := time.NewTicker(client.HeartbeatInterval)
	heartbeatChan := heartbeatTicker.C
	heartbeatFailedCnt := 0
	msgTimeout := client.MsgTimeout

	// v2 opportunistically buffers data to clients to reduce write system calls
	// we force flush in two cases:
	//    1. when the client is not ready to receive messages
	//    2. we're buffered and the channel has nothing left to send us
	//       (ie. we would block in this loop anyway)
	//
	flushed := true

	// signal to the goroutine that started the messagePump
	// that we've started up
	close(startedChan)

	for {
		if subChannel == nil || !client.IsReadyForMessages() {
			// the client is not ready to receive messages...
			clientMsgChan = nil
			flusherChan = nil
			// force flush
			client.LockWrite()
			err = client.Flush()
			client.UnlockWrite()
			if err != nil {
				goto exit
			}
			flushed = true
		} else if flushed {
			// last iteration we flushed...
			// do not select on the flusher ticker channel
			clientMsgChan = subChannel.GetClientMsgChan()
			flusherChan = nil
		} else {
			// we're buffered (if there isn't any more data we should flush)...
			// select on the flusher ticker channel, too
			clientMsgChan = subChannel.GetClientMsgChan()
			flusherChan = outputBufferTicker.C
		}

		select {
		case <-client.ExitChan:
			goto exit
		case <-flusherChan:
			// if this case wins, we're either starved
			// or we won the race between other channels...
			// in either case, force flush
			client.LockWrite()
			err = client.Flush()
			client.UnlockWrite()
			if err != nil {
				goto exit
			}
			flushed = true
		case <-client.ReadyStateChan:
		case subChannel = <-subEventChan:
			// you can't SUB anymore
			nsqd.NsqLogger().Logf("client %v sub to channel: %v", client.ID,
				subChannel.GetName())
			subEventChan = nil
		case identifyData := <-identifyEventChan:
			// you can't IDENTIFY anymore
			identifyEventChan = nil

			outputBufferTicker.Stop()
			if identifyData.OutputBufferTimeout > 0 {
				outputBufferTicker = time.NewTicker(identifyData.OutputBufferTimeout)
			}

			heartbeatTicker.Stop()
			heartbeatChan = nil
			if identifyData.HeartbeatInterval > 0 {
				heartbeatTicker = time.NewTicker(identifyData.HeartbeatInterval)
				heartbeatChan = heartbeatTicker.C
			}

			if identifyData.SampleRate > 0 {
				sampleRate = identifyData.SampleRate
			}

			msgTimeout = identifyData.MsgTimeout
		case <-heartbeatChan:
			if subChannel != nil && client.IsReadyForMessages() {
				// try wake up the channel
				subChannel.TryWakeupRead()
			}

			err = Send(client, frameTypeResponse, heartbeatBytes)
			nsqd.NsqLogger().LogDebugf("PROTOCOL(V2): [%s] send heartbeat", client)
			if err != nil {
				heartbeatFailedCnt++
				nsqd.NsqLogger().LogWarningf("PROTOCOL(V2): [%s] send heartbeat failed %v times, %v", client, heartbeatFailedCnt, err)
				if heartbeatFailedCnt > 2 {
					goto exit
				}
			} else {
				heartbeatFailedCnt = 0
			}
		case msg, ok := <-clientMsgChan:
			if !ok {
				goto exit
			}

			if sampleRate > 0 && rand.Int31n(100) > sampleRate {
				// FIN automatically, all message will not wait to confirm if not sending,
				// and the reader keep moving forward.
				offset, _, _, _ := subChannel.ConfirmBackendQueue(msg)
				// TODO: sync to replica nodes.
				_ = offset
				continue
			}
			// avoid re-send some confirmed message,
			// this may happen while the channel reader is reset to old position
			// due to some retry or leader change.
			if subChannel.IsConfirmed(msg) {
				continue
			}

			subChannel.StartInFlightTimeout(msg, client.ID, client.String(), msgTimeout)
			client.SendingMessage()
			err = SendMessage(client, msg, &buf, subChannel.IsOrdered())
			if err != nil {
				goto exit
			}
			flushed = false
		}
	}

exit:
	nsqd.NsqLogger().LogDebugf("PROTOCOL(V2): [%s] exiting messagePump", client)
	heartbeatTicker.Stop()
	outputBufferTicker.Stop()
	if err != nil {
		nsqd.NsqLogger().Logf("PROTOCOL(V2): [%s] messagePump error - %s", client, err)
	}
	close(stoppedChan)
}
func (self *NsqdCoordinator) updateChannelOffsetOnSlave(tc *coordData, channelName string, offset ChannelConsumerOffset) *CoordErr {
	topicName := tc.topicInfo.Name
	partition := tc.topicInfo.Partition

	if !tc.IsMineISR(self.myNode.GetID()) {
		return ErrTopicWriteOnNonISR
	}

	if coordLog.Level() >= levellogger.LOG_DETAIL {
		coordLog.Debugf("got update channel(%v) offset on slave : %v", channelName, offset)
	}
	coord, coordErr := self.getTopicCoord(topicName, partition)
	if coordErr != nil {
		return ErrMissingTopicCoord
	}

	topic, localErr := self.localNsqd.GetExistingTopic(topicName, partition)
	if localErr != nil {
		coordLog.Warningf("slave missing topic : %v", topicName)
		// TODO: leave the isr and try re-sync with leader
		return &CoordErr{localErr.Error(), RpcCommonErr, CoordSlaveErr}
	}

	if topic.GetTopicPart() != partition {
		coordLog.Errorf("topic on slave has different partition : %v vs %v", topic.GetTopicPart(), partition)
		return ErrLocalMissingTopic
	}
	var ch *nsqd.Channel
	ch, localErr = topic.GetExistingChannel(channelName)
	// if a new channel on slave, we should set the consume offset by force
	if localErr != nil {
		offset.AllowBackward = true
		ch = topic.GetChannel(channelName)
		coordLog.Infof("slave init the channel : %v, %v, offset: %v", topic.GetTopicName(), channelName, ch.GetConfirmed())
	}
	if ch.IsEphemeral() {
		coordLog.Errorf("ephemeral channel %v should not be synced on slave", channelName)
	}
	currentEnd := ch.GetChannelEnd()
	if nsqd.BackendOffset(offset.VOffset) > currentEnd.Offset() {
		coordLog.Debugf("update channel(%v) consume offset exceed end %v on slave : %v", channelName, offset, currentEnd)
		// cache the offset (using map?) to reduce the slave channel flush.
		coord.consumeMgr.Lock()
		cur, ok := coord.consumeMgr.channelConsumeOffset[channelName]
		if !ok || cur.VOffset < offset.VOffset {
			coord.consumeMgr.channelConsumeOffset[channelName] = offset
		}
		coord.consumeMgr.Unlock()

		if offset.Flush {
			topic.ForceFlush()
			currentEnd = ch.GetChannelEnd()
			if nsqd.BackendOffset(offset.VOffset) > currentEnd.Offset() {
				offset.VOffset = int64(currentEnd.Offset())
				offset.VCnt = currentEnd.TotalMsgCnt()
			}
		} else {
			return nil
		}
	}
	err := ch.ConfirmBackendQueueOnSlave(nsqd.BackendOffset(offset.VOffset), offset.VCnt, offset.AllowBackward)
	if err != nil {
		coordLog.Warningf("update local channel(%v) offset %v failed: %v, current channel end: %v, topic end: %v",
			channelName, offset, err, currentEnd, topic.TotalDataSize())
		if err == nsqd.ErrExiting {
			return &CoordErr{err.Error(), RpcNoErr, CoordTmpErr}
		}
		return &CoordErr{err.Error(), RpcCommonErr, CoordSlaveErr}
	}
	return nil
}
func (self *NsqdCoordinator) FinishMessageToCluster(channel *nsqd.Channel, clientID int64, clientAddr string, msgID nsqd.MessageID) error {
	topicName := channel.GetTopicName()
	partition := channel.GetTopicPart()
	coord, checkErr := self.getTopicCoord(topicName, partition)
	if checkErr != nil {
		return checkErr.ToErrorType()
	}

	var syncOffset ChannelConsumerOffset
	changed := false
	var confirmed nsqd.BackendQueueEnd
	if channel.IsOrdered() {
		if !coord.GetData().IsISRReadyForWrite() {
			coordLog.Warningf("topic(%v) finish message ordered failed since no enough ISR", topicName)
			coordErrStats.incWriteErr(ErrWriteQuorumFailed)
			return ErrWriteQuorumFailed.ToErrorType()
		}

		confirmed = channel.GetConfirmed()
	}
	// TODO: maybe use channel to aggregate all the sync of message to reduce the rpc call.

	doLocalWrite := func(d *coordData) *CoordErr {
		offset, cnt, tmpChanged, localErr := channel.FinishMessage(clientID, clientAddr, msgID)
		if localErr != nil {
			coordLog.Infof("channel %v finish local msg %v error: %v", channel.GetName(), msgID, localErr)
			changed = false
			return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr}
		}
		changed = tmpChanged
		syncOffset.VOffset = int64(offset)
		syncOffset.VCnt = cnt
		return nil
	}
	doLocalExit := func(err *CoordErr) {}
	doLocalCommit := func() error {
		channel.ContinueConsumeForOrder()
		return nil
	}
	doLocalRollback := func() {
		if channel.IsOrdered() && confirmed != nil {
			coordLog.Warningf("rollback channel confirm to : %v", confirmed)
			// reset read to last confirmed
			channel.SetConsumeOffset(confirmed.Offset(), confirmed.TotalMsgCnt(), true)
		}
	}
	doRefresh := func(d *coordData) *CoordErr {
		return nil
	}
	doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr {
		if !changed || channel.IsEphemeral() {
			return nil
		}
		var rpcErr *CoordErr
		if channel.IsOrdered() {
			// if ordered, we need make sure all the consume offset is synced to all replicas
			rpcErr = c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset)
		} else {
			c.NotifyUpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset)
		}
		if rpcErr != nil {
			coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", channel.GetName(),
				nodeID, rpcErr, syncOffset)
		}
		return rpcErr
	}
	handleSyncResult := func(successNum int, tcData *coordData) bool {
		// we can ignore the error if this channel is not ordered. (just sync next time)
		if successNum == len(tcData.topicInfo.ISR) || !channel.IsOrdered() {
			return true
		}
		return false
	}
	clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback,
		doRefresh, doSlaveSync, handleSyncResult)
	if clusterErr != nil {
		return clusterErr.ToErrorType()
	}
	return nil
}
func (self *NsqdCoordinator) SetChannelConsumeOffsetToCluster(ch *nsqd.Channel, queueOffset int64, cnt int64, force bool) error {
	topicName := ch.GetTopicName()
	partition := ch.GetTopicPart()
	coord, checkErr := self.getTopicCoord(topicName, partition)
	if checkErr != nil {
		return checkErr.ToErrorType()
	}

	var syncOffset ChannelConsumerOffset
	syncOffset.AllowBackward = true
	syncOffset.VCnt = cnt
	syncOffset.VOffset = queueOffset

	doLocalWrite := func(d *coordData) *CoordErr {
		err := ch.SetConsumeOffset(nsqd.BackendOffset(queueOffset), cnt, force)
		if err != nil {
			if err != nsqd.ErrSetConsumeOffsetNotFirstClient {
				coordLog.Infof("failed to set the consume offset: %v, err:%v", queueOffset, err)
				return &CoordErr{err.Error(), RpcNoErr, CoordLocalErr}
			}
			coordLog.Debugf("the consume offset: %v can only be set by the first client", queueOffset)
			return ErrLocalSetChannelOffsetNotFirstClient
		}
		return nil
	}
	doLocalExit := func(err *CoordErr) {}
	doLocalCommit := func() error {
		return nil
	}
	doLocalRollback := func() {}
	doRefresh := func(d *coordData) *CoordErr {
		return nil
	}
	doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr {
		if ch.IsEphemeral() {
			return nil
		}
		rpcErr := c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, ch.GetName(), syncOffset)
		if rpcErr != nil {
			coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", ch.GetName(),
				nodeID, rpcErr, syncOffset)
		}
		return rpcErr
	}
	handleSyncResult := func(successNum int, tcData *coordData) bool {
		if successNum == len(tcData.topicInfo.ISR) {
			return true
		}
		return false
	}
	clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback,
		doRefresh, doSlaveSync, handleSyncResult)
	if clusterErr != nil {
		return clusterErr.ToErrorType()
	}
	return nil
}