func (self *NsqdCoordinator) SetChannelConsumeOffsetToCluster(ch *nsqd.Channel, queueOffset int64, cnt int64, force bool) error { topicName := ch.GetTopicName() partition := ch.GetTopicPart() coord, checkErr := self.getTopicCoord(topicName, partition) if checkErr != nil { return checkErr.ToErrorType() } var syncOffset ChannelConsumerOffset syncOffset.AllowBackward = true syncOffset.VCnt = cnt syncOffset.VOffset = queueOffset doLocalWrite := func(d *coordData) *CoordErr { err := ch.SetConsumeOffset(nsqd.BackendOffset(queueOffset), cnt, force) if err != nil { if err != nsqd.ErrSetConsumeOffsetNotFirstClient { coordLog.Infof("failed to set the consume offset: %v, err:%v", queueOffset, err) return &CoordErr{err.Error(), RpcNoErr, CoordLocalErr} } coordLog.Debugf("the consume offset: %v can only be set by the first client", queueOffset) return ErrLocalSetChannelOffsetNotFirstClient } return nil } doLocalExit := func(err *CoordErr) {} doLocalCommit := func() error { return nil } doLocalRollback := func() {} doRefresh := func(d *coordData) *CoordErr { return nil } doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr { if ch.IsEphemeral() { return nil } rpcErr := c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, ch.GetName(), syncOffset) if rpcErr != nil { coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", ch.GetName(), nodeID, rpcErr, syncOffset) } return rpcErr } handleSyncResult := func(successNum int, tcData *coordData) bool { if successNum == len(tcData.topicInfo.ISR) { return true } return false } clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback, doRefresh, doSlaveSync, handleSyncResult) if clusterErr != nil { return clusterErr.ToErrorType() } return nil }
func (p *protocolV2) messagePump(client *nsqd.ClientV2, startedChan chan bool, stoppedChan chan bool) { var err error var buf bytes.Buffer var clientMsgChan chan *nsqd.Message var subChannel *nsqd.Channel // NOTE: `flusherChan` is used to bound message latency for // the pathological case of a channel on a low volume topic // with >1 clients having >1 RDY counts var flusherChan <-chan time.Time var sampleRate int32 subEventChan := client.SubEventChan identifyEventChan := client.IdentifyEventChan outputBufferTicker := time.NewTicker(client.OutputBufferTimeout) heartbeatTicker := time.NewTicker(client.HeartbeatInterval) heartbeatChan := heartbeatTicker.C heartbeatFailedCnt := 0 msgTimeout := client.MsgTimeout // v2 opportunistically buffers data to clients to reduce write system calls // we force flush in two cases: // 1. when the client is not ready to receive messages // 2. we're buffered and the channel has nothing left to send us // (ie. we would block in this loop anyway) // flushed := true // signal to the goroutine that started the messagePump // that we've started up close(startedChan) for { if subChannel == nil || !client.IsReadyForMessages() { // the client is not ready to receive messages... clientMsgChan = nil flusherChan = nil // force flush client.LockWrite() err = client.Flush() client.UnlockWrite() if err != nil { goto exit } flushed = true } else if flushed { // last iteration we flushed... // do not select on the flusher ticker channel clientMsgChan = subChannel.GetClientMsgChan() flusherChan = nil } else { // we're buffered (if there isn't any more data we should flush)... // select on the flusher ticker channel, too clientMsgChan = subChannel.GetClientMsgChan() flusherChan = outputBufferTicker.C } select { case <-client.ExitChan: goto exit case <-flusherChan: // if this case wins, we're either starved // or we won the race between other channels... // in either case, force flush client.LockWrite() err = client.Flush() client.UnlockWrite() if err != nil { goto exit } flushed = true case <-client.ReadyStateChan: case subChannel = <-subEventChan: // you can't SUB anymore nsqd.NsqLogger().Logf("client %v sub to channel: %v", client.ID, subChannel.GetName()) subEventChan = nil case identifyData := <-identifyEventChan: // you can't IDENTIFY anymore identifyEventChan = nil outputBufferTicker.Stop() if identifyData.OutputBufferTimeout > 0 { outputBufferTicker = time.NewTicker(identifyData.OutputBufferTimeout) } heartbeatTicker.Stop() heartbeatChan = nil if identifyData.HeartbeatInterval > 0 { heartbeatTicker = time.NewTicker(identifyData.HeartbeatInterval) heartbeatChan = heartbeatTicker.C } if identifyData.SampleRate > 0 { sampleRate = identifyData.SampleRate } msgTimeout = identifyData.MsgTimeout case <-heartbeatChan: if subChannel != nil && client.IsReadyForMessages() { // try wake up the channel subChannel.TryWakeupRead() } err = Send(client, frameTypeResponse, heartbeatBytes) nsqd.NsqLogger().LogDebugf("PROTOCOL(V2): [%s] send heartbeat", client) if err != nil { heartbeatFailedCnt++ nsqd.NsqLogger().LogWarningf("PROTOCOL(V2): [%s] send heartbeat failed %v times, %v", client, heartbeatFailedCnt, err) if heartbeatFailedCnt > 2 { goto exit } } else { heartbeatFailedCnt = 0 } case msg, ok := <-clientMsgChan: if !ok { goto exit } if sampleRate > 0 && rand.Int31n(100) > sampleRate { // FIN automatically, all message will not wait to confirm if not sending, // and the reader keep moving forward. offset, _, _, _ := subChannel.ConfirmBackendQueue(msg) // TODO: sync to replica nodes. _ = offset continue } // avoid re-send some confirmed message, // this may happen while the channel reader is reset to old position // due to some retry or leader change. if subChannel.IsConfirmed(msg) { continue } subChannel.StartInFlightTimeout(msg, client.ID, client.String(), msgTimeout) client.SendingMessage() err = SendMessage(client, msg, &buf, subChannel.IsOrdered()) if err != nil { goto exit } flushed = false } } exit: nsqd.NsqLogger().LogDebugf("PROTOCOL(V2): [%s] exiting messagePump", client) heartbeatTicker.Stop() outputBufferTicker.Stop() if err != nil { nsqd.NsqLogger().Logf("PROTOCOL(V2): [%s] messagePump error - %s", client, err) } close(stoppedChan) }
func (self *NsqdCoordinator) FinishMessageToCluster(channel *nsqd.Channel, clientID int64, clientAddr string, msgID nsqd.MessageID) error { topicName := channel.GetTopicName() partition := channel.GetTopicPart() coord, checkErr := self.getTopicCoord(topicName, partition) if checkErr != nil { return checkErr.ToErrorType() } var syncOffset ChannelConsumerOffset changed := false var confirmed nsqd.BackendQueueEnd if channel.IsOrdered() { if !coord.GetData().IsISRReadyForWrite() { coordLog.Warningf("topic(%v) finish message ordered failed since no enough ISR", topicName) coordErrStats.incWriteErr(ErrWriteQuorumFailed) return ErrWriteQuorumFailed.ToErrorType() } confirmed = channel.GetConfirmed() } // TODO: maybe use channel to aggregate all the sync of message to reduce the rpc call. doLocalWrite := func(d *coordData) *CoordErr { offset, cnt, tmpChanged, localErr := channel.FinishMessage(clientID, clientAddr, msgID) if localErr != nil { coordLog.Infof("channel %v finish local msg %v error: %v", channel.GetName(), msgID, localErr) changed = false return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr} } changed = tmpChanged syncOffset.VOffset = int64(offset) syncOffset.VCnt = cnt return nil } doLocalExit := func(err *CoordErr) {} doLocalCommit := func() error { channel.ContinueConsumeForOrder() return nil } doLocalRollback := func() { if channel.IsOrdered() && confirmed != nil { coordLog.Warningf("rollback channel confirm to : %v", confirmed) // reset read to last confirmed channel.SetConsumeOffset(confirmed.Offset(), confirmed.TotalMsgCnt(), true) } } doRefresh := func(d *coordData) *CoordErr { return nil } doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr { if !changed || channel.IsEphemeral() { return nil } var rpcErr *CoordErr if channel.IsOrdered() { // if ordered, we need make sure all the consume offset is synced to all replicas rpcErr = c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset) } else { c.NotifyUpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset) } if rpcErr != nil { coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", channel.GetName(), nodeID, rpcErr, syncOffset) } return rpcErr } handleSyncResult := func(successNum int, tcData *coordData) bool { // we can ignore the error if this channel is not ordered. (just sync next time) if successNum == len(tcData.topicInfo.ISR) || !channel.IsOrdered() { return true } return false } clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback, doRefresh, doSlaveSync, handleSyncResult) if clusterErr != nil { return clusterErr.ToErrorType() } return nil }