func (self *NsqdCoordinator) SetChannelConsumeOffsetToCluster(ch *nsqd.Channel, queueOffset int64, cnt int64, force bool) error { topicName := ch.GetTopicName() partition := ch.GetTopicPart() coord, checkErr := self.getTopicCoord(topicName, partition) if checkErr != nil { return checkErr.ToErrorType() } var syncOffset ChannelConsumerOffset syncOffset.AllowBackward = true syncOffset.VCnt = cnt syncOffset.VOffset = queueOffset doLocalWrite := func(d *coordData) *CoordErr { err := ch.SetConsumeOffset(nsqd.BackendOffset(queueOffset), cnt, force) if err != nil { if err != nsqd.ErrSetConsumeOffsetNotFirstClient { coordLog.Infof("failed to set the consume offset: %v, err:%v", queueOffset, err) return &CoordErr{err.Error(), RpcNoErr, CoordLocalErr} } coordLog.Debugf("the consume offset: %v can only be set by the first client", queueOffset) return ErrLocalSetChannelOffsetNotFirstClient } return nil } doLocalExit := func(err *CoordErr) {} doLocalCommit := func() error { return nil } doLocalRollback := func() {} doRefresh := func(d *coordData) *CoordErr { return nil } doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr { if ch.IsEphemeral() { return nil } rpcErr := c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, ch.GetName(), syncOffset) if rpcErr != nil { coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", ch.GetName(), nodeID, rpcErr, syncOffset) } return rpcErr } handleSyncResult := func(successNum int, tcData *coordData) bool { if successNum == len(tcData.topicInfo.ISR) { return true } return false } clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback, doRefresh, doSlaveSync, handleSyncResult) if clusterErr != nil { return clusterErr.ToErrorType() } return nil }
func (self *NsqdCoordinator) updateChannelOffsetOnSlave(tc *coordData, channelName string, offset ChannelConsumerOffset) *CoordErr { topicName := tc.topicInfo.Name partition := tc.topicInfo.Partition if !tc.IsMineISR(self.myNode.GetID()) { return ErrTopicWriteOnNonISR } if coordLog.Level() >= levellogger.LOG_DETAIL { coordLog.Debugf("got update channel(%v) offset on slave : %v", channelName, offset) } coord, coordErr := self.getTopicCoord(topicName, partition) if coordErr != nil { return ErrMissingTopicCoord } topic, localErr := self.localNsqd.GetExistingTopic(topicName, partition) if localErr != nil { coordLog.Warningf("slave missing topic : %v", topicName) // TODO: leave the isr and try re-sync with leader return &CoordErr{localErr.Error(), RpcCommonErr, CoordSlaveErr} } if topic.GetTopicPart() != partition { coordLog.Errorf("topic on slave has different partition : %v vs %v", topic.GetTopicPart(), partition) return ErrLocalMissingTopic } var ch *nsqd.Channel ch, localErr = topic.GetExistingChannel(channelName) // if a new channel on slave, we should set the consume offset by force if localErr != nil { offset.AllowBackward = true ch = topic.GetChannel(channelName) coordLog.Infof("slave init the channel : %v, %v, offset: %v", topic.GetTopicName(), channelName, ch.GetConfirmed()) } if ch.IsEphemeral() { coordLog.Errorf("ephemeral channel %v should not be synced on slave", channelName) } currentEnd := ch.GetChannelEnd() if nsqd.BackendOffset(offset.VOffset) > currentEnd.Offset() { coordLog.Debugf("update channel(%v) consume offset exceed end %v on slave : %v", channelName, offset, currentEnd) // cache the offset (using map?) to reduce the slave channel flush. coord.consumeMgr.Lock() cur, ok := coord.consumeMgr.channelConsumeOffset[channelName] if !ok || cur.VOffset < offset.VOffset { coord.consumeMgr.channelConsumeOffset[channelName] = offset } coord.consumeMgr.Unlock() if offset.Flush { topic.ForceFlush() currentEnd = ch.GetChannelEnd() if nsqd.BackendOffset(offset.VOffset) > currentEnd.Offset() { offset.VOffset = int64(currentEnd.Offset()) offset.VCnt = currentEnd.TotalMsgCnt() } } else { return nil } } err := ch.ConfirmBackendQueueOnSlave(nsqd.BackendOffset(offset.VOffset), offset.VCnt, offset.AllowBackward) if err != nil { coordLog.Warningf("update local channel(%v) offset %v failed: %v, current channel end: %v, topic end: %v", channelName, offset, err, currentEnd, topic.TotalDataSize()) if err == nsqd.ErrExiting { return &CoordErr{err.Error(), RpcNoErr, CoordTmpErr} } return &CoordErr{err.Error(), RpcCommonErr, CoordSlaveErr} } return nil }
func (self *NsqdCoordinator) FinishMessageToCluster(channel *nsqd.Channel, clientID int64, clientAddr string, msgID nsqd.MessageID) error { topicName := channel.GetTopicName() partition := channel.GetTopicPart() coord, checkErr := self.getTopicCoord(topicName, partition) if checkErr != nil { return checkErr.ToErrorType() } var syncOffset ChannelConsumerOffset changed := false var confirmed nsqd.BackendQueueEnd if channel.IsOrdered() { if !coord.GetData().IsISRReadyForWrite() { coordLog.Warningf("topic(%v) finish message ordered failed since no enough ISR", topicName) coordErrStats.incWriteErr(ErrWriteQuorumFailed) return ErrWriteQuorumFailed.ToErrorType() } confirmed = channel.GetConfirmed() } // TODO: maybe use channel to aggregate all the sync of message to reduce the rpc call. doLocalWrite := func(d *coordData) *CoordErr { offset, cnt, tmpChanged, localErr := channel.FinishMessage(clientID, clientAddr, msgID) if localErr != nil { coordLog.Infof("channel %v finish local msg %v error: %v", channel.GetName(), msgID, localErr) changed = false return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr} } changed = tmpChanged syncOffset.VOffset = int64(offset) syncOffset.VCnt = cnt return nil } doLocalExit := func(err *CoordErr) {} doLocalCommit := func() error { channel.ContinueConsumeForOrder() return nil } doLocalRollback := func() { if channel.IsOrdered() && confirmed != nil { coordLog.Warningf("rollback channel confirm to : %v", confirmed) // reset read to last confirmed channel.SetConsumeOffset(confirmed.Offset(), confirmed.TotalMsgCnt(), true) } } doRefresh := func(d *coordData) *CoordErr { return nil } doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr { if !changed || channel.IsEphemeral() { return nil } var rpcErr *CoordErr if channel.IsOrdered() { // if ordered, we need make sure all the consume offset is synced to all replicas rpcErr = c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset) } else { c.NotifyUpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset) } if rpcErr != nil { coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", channel.GetName(), nodeID, rpcErr, syncOffset) } return rpcErr } handleSyncResult := func(successNum int, tcData *coordData) bool { // we can ignore the error if this channel is not ordered. (just sync next time) if successNum == len(tcData.topicInfo.ISR) || !channel.IsOrdered() { return true } return false } clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback, doRefresh, doSlaveSync, handleSyncResult) if clusterErr != nil { return clusterErr.ToErrorType() } return nil }