Beispiel #1
0
func (p *protocolV2) messagePump(client *nsqd.ClientV2, startedChan chan bool,
	stoppedChan chan bool) {
	var err error
	var buf bytes.Buffer
	var clientMsgChan chan *nsqd.Message
	var subChannel *nsqd.Channel
	// NOTE: `flusherChan` is used to bound message latency for
	// the pathological case of a channel on a low volume topic
	// with >1 clients having >1 RDY counts
	var flusherChan <-chan time.Time
	var sampleRate int32

	subEventChan := client.SubEventChan
	identifyEventChan := client.IdentifyEventChan
	outputBufferTicker := time.NewTicker(client.OutputBufferTimeout)
	heartbeatTicker := time.NewTicker(client.HeartbeatInterval)
	heartbeatChan := heartbeatTicker.C
	heartbeatFailedCnt := 0
	msgTimeout := client.MsgTimeout

	// v2 opportunistically buffers data to clients to reduce write system calls
	// we force flush in two cases:
	//    1. when the client is not ready to receive messages
	//    2. we're buffered and the channel has nothing left to send us
	//       (ie. we would block in this loop anyway)
	//
	flushed := true

	// signal to the goroutine that started the messagePump
	// that we've started up
	close(startedChan)

	for {
		if subChannel == nil || !client.IsReadyForMessages() {
			// the client is not ready to receive messages...
			clientMsgChan = nil
			flusherChan = nil
			// force flush
			client.LockWrite()
			err = client.Flush()
			client.UnlockWrite()
			if err != nil {
				goto exit
			}
			flushed = true
		} else if flushed {
			// last iteration we flushed...
			// do not select on the flusher ticker channel
			clientMsgChan = subChannel.GetClientMsgChan()
			flusherChan = nil
		} else {
			// we're buffered (if there isn't any more data we should flush)...
			// select on the flusher ticker channel, too
			clientMsgChan = subChannel.GetClientMsgChan()
			flusherChan = outputBufferTicker.C
		}

		select {
		case <-client.ExitChan:
			goto exit
		case <-flusherChan:
			// if this case wins, we're either starved
			// or we won the race between other channels...
			// in either case, force flush
			client.LockWrite()
			err = client.Flush()
			client.UnlockWrite()
			if err != nil {
				goto exit
			}
			flushed = true
		case <-client.ReadyStateChan:
		case subChannel = <-subEventChan:
			// you can't SUB anymore
			nsqd.NsqLogger().Logf("client %v sub to channel: %v", client.ID,
				subChannel.GetName())
			subEventChan = nil
		case identifyData := <-identifyEventChan:
			// you can't IDENTIFY anymore
			identifyEventChan = nil

			outputBufferTicker.Stop()
			if identifyData.OutputBufferTimeout > 0 {
				outputBufferTicker = time.NewTicker(identifyData.OutputBufferTimeout)
			}

			heartbeatTicker.Stop()
			heartbeatChan = nil
			if identifyData.HeartbeatInterval > 0 {
				heartbeatTicker = time.NewTicker(identifyData.HeartbeatInterval)
				heartbeatChan = heartbeatTicker.C
			}

			if identifyData.SampleRate > 0 {
				sampleRate = identifyData.SampleRate
			}

			msgTimeout = identifyData.MsgTimeout
		case <-heartbeatChan:
			if subChannel != nil && client.IsReadyForMessages() {
				// try wake up the channel
				subChannel.TryWakeupRead()
			}

			err = Send(client, frameTypeResponse, heartbeatBytes)
			nsqd.NsqLogger().LogDebugf("PROTOCOL(V2): [%s] send heartbeat", client)
			if err != nil {
				heartbeatFailedCnt++
				nsqd.NsqLogger().LogWarningf("PROTOCOL(V2): [%s] send heartbeat failed %v times, %v", client, heartbeatFailedCnt, err)
				if heartbeatFailedCnt > 2 {
					goto exit
				}
			} else {
				heartbeatFailedCnt = 0
			}
		case msg, ok := <-clientMsgChan:
			if !ok {
				goto exit
			}

			if sampleRate > 0 && rand.Int31n(100) > sampleRate {
				// FIN automatically, all message will not wait to confirm if not sending,
				// and the reader keep moving forward.
				offset, _, _, _ := subChannel.ConfirmBackendQueue(msg)
				// TODO: sync to replica nodes.
				_ = offset
				continue
			}
			// avoid re-send some confirmed message,
			// this may happen while the channel reader is reset to old position
			// due to some retry or leader change.
			if subChannel.IsConfirmed(msg) {
				continue
			}

			subChannel.StartInFlightTimeout(msg, client.ID, client.String(), msgTimeout)
			client.SendingMessage()
			err = SendMessage(client, msg, &buf, subChannel.IsOrdered())
			if err != nil {
				goto exit
			}
			flushed = false
		}
	}

exit:
	nsqd.NsqLogger().LogDebugf("PROTOCOL(V2): [%s] exiting messagePump", client)
	heartbeatTicker.Stop()
	outputBufferTicker.Stop()
	if err != nil {
		nsqd.NsqLogger().Logf("PROTOCOL(V2): [%s] messagePump error - %s", client, err)
	}
	close(stoppedChan)
}
func (self *NsqdCoordinator) FinishMessageToCluster(channel *nsqd.Channel, clientID int64, clientAddr string, msgID nsqd.MessageID) error {
	topicName := channel.GetTopicName()
	partition := channel.GetTopicPart()
	coord, checkErr := self.getTopicCoord(topicName, partition)
	if checkErr != nil {
		return checkErr.ToErrorType()
	}

	var syncOffset ChannelConsumerOffset
	changed := false
	var confirmed nsqd.BackendQueueEnd
	if channel.IsOrdered() {
		if !coord.GetData().IsISRReadyForWrite() {
			coordLog.Warningf("topic(%v) finish message ordered failed since no enough ISR", topicName)
			coordErrStats.incWriteErr(ErrWriteQuorumFailed)
			return ErrWriteQuorumFailed.ToErrorType()
		}

		confirmed = channel.GetConfirmed()
	}
	// TODO: maybe use channel to aggregate all the sync of message to reduce the rpc call.

	doLocalWrite := func(d *coordData) *CoordErr {
		offset, cnt, tmpChanged, localErr := channel.FinishMessage(clientID, clientAddr, msgID)
		if localErr != nil {
			coordLog.Infof("channel %v finish local msg %v error: %v", channel.GetName(), msgID, localErr)
			changed = false
			return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr}
		}
		changed = tmpChanged
		syncOffset.VOffset = int64(offset)
		syncOffset.VCnt = cnt
		return nil
	}
	doLocalExit := func(err *CoordErr) {}
	doLocalCommit := func() error {
		channel.ContinueConsumeForOrder()
		return nil
	}
	doLocalRollback := func() {
		if channel.IsOrdered() && confirmed != nil {
			coordLog.Warningf("rollback channel confirm to : %v", confirmed)
			// reset read to last confirmed
			channel.SetConsumeOffset(confirmed.Offset(), confirmed.TotalMsgCnt(), true)
		}
	}
	doRefresh := func(d *coordData) *CoordErr {
		return nil
	}
	doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr {
		if !changed || channel.IsEphemeral() {
			return nil
		}
		var rpcErr *CoordErr
		if channel.IsOrdered() {
			// if ordered, we need make sure all the consume offset is synced to all replicas
			rpcErr = c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset)
		} else {
			c.NotifyUpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset)
		}
		if rpcErr != nil {
			coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", channel.GetName(),
				nodeID, rpcErr, syncOffset)
		}
		return rpcErr
	}
	handleSyncResult := func(successNum int, tcData *coordData) bool {
		// we can ignore the error if this channel is not ordered. (just sync next time)
		if successNum == len(tcData.topicInfo.ISR) || !channel.IsOrdered() {
			return true
		}
		return false
	}
	clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback,
		doRefresh, doSlaveSync, handleSyncResult)
	if clusterErr != nil {
		return clusterErr.ToErrorType()
	}
	return nil
}