func (self *NsqdCoordinator) FinishMessageToCluster(channel *nsqd.Channel, clientID int64, clientAddr string, msgID nsqd.MessageID) error { topicName := channel.GetTopicName() partition := channel.GetTopicPart() coord, checkErr := self.getTopicCoord(topicName, partition) if checkErr != nil { return checkErr.ToErrorType() } var syncOffset ChannelConsumerOffset changed := false var confirmed nsqd.BackendQueueEnd if channel.IsOrdered() { if !coord.GetData().IsISRReadyForWrite() { coordLog.Warningf("topic(%v) finish message ordered failed since no enough ISR", topicName) coordErrStats.incWriteErr(ErrWriteQuorumFailed) return ErrWriteQuorumFailed.ToErrorType() } confirmed = channel.GetConfirmed() } // TODO: maybe use channel to aggregate all the sync of message to reduce the rpc call. doLocalWrite := func(d *coordData) *CoordErr { offset, cnt, tmpChanged, localErr := channel.FinishMessage(clientID, clientAddr, msgID) if localErr != nil { coordLog.Infof("channel %v finish local msg %v error: %v", channel.GetName(), msgID, localErr) changed = false return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr} } changed = tmpChanged syncOffset.VOffset = int64(offset) syncOffset.VCnt = cnt return nil } doLocalExit := func(err *CoordErr) {} doLocalCommit := func() error { channel.ContinueConsumeForOrder() return nil } doLocalRollback := func() { if channel.IsOrdered() && confirmed != nil { coordLog.Warningf("rollback channel confirm to : %v", confirmed) // reset read to last confirmed channel.SetConsumeOffset(confirmed.Offset(), confirmed.TotalMsgCnt(), true) } } doRefresh := func(d *coordData) *CoordErr { return nil } doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr { if !changed || channel.IsEphemeral() { return nil } var rpcErr *CoordErr if channel.IsOrdered() { // if ordered, we need make sure all the consume offset is synced to all replicas rpcErr = c.UpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset) } else { c.NotifyUpdateChannelOffset(&tcData.topicLeaderSession, &tcData.topicInfo, channel.GetName(), syncOffset) } if rpcErr != nil { coordLog.Infof("sync channel(%v) offset to replica %v failed: %v, offset: %v", channel.GetName(), nodeID, rpcErr, syncOffset) } return rpcErr } handleSyncResult := func(successNum int, tcData *coordData) bool { // we can ignore the error if this channel is not ordered. (just sync next time) if successNum == len(tcData.topicInfo.ISR) || !channel.IsOrdered() { return true } return false } clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback, doRefresh, doSlaveSync, handleSyncResult) if clusterErr != nil { return clusterErr.ToErrorType() } return nil }
func (self *NsqdCoordinator) PutMessageToCluster(topic *nsqd.Topic, body []byte, traceID uint64) (nsqd.MessageID, nsqd.BackendOffset, int32, nsqd.BackendQueueEnd, error) { var commitLog CommitLogData var queueEnd nsqd.BackendQueueEnd msg := nsqd.NewMessage(0, body) msg.TraceID = traceID topicName := topic.GetTopicName() partition := topic.GetTopicPart() coord, checkErr := self.getTopicCoord(topicName, partition) if checkErr != nil { return msg.ID, nsqd.BackendOffset(commitLog.MsgOffset), commitLog.MsgSize, queueEnd, checkErr.ToErrorType() } var logMgr *TopicCommitLogMgr doLocalWrite := func(d *coordData) *CoordErr { logMgr = d.logMgr topic.Lock() id, offset, writeBytes, qe, localErr := topic.PutMessageNoLock(msg) queueEnd = qe topic.Unlock() if localErr != nil { coordLog.Warningf("put message to local failed: %v", localErr) return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr} } commitLog.LogID = int64(id) // epoch should not be changed. // leader epoch change means leadership change, leadership change // need disable write which should hold the write lock. // However, we are holding write lock while doing the cluster write replication. commitLog.Epoch = d.GetTopicEpochForWrite() commitLog.LastMsgLogID = commitLog.LogID commitLog.MsgOffset = int64(offset) commitLog.MsgSize = writeBytes commitLog.MsgCnt = queueEnd.TotalMsgCnt() commitLog.MsgNum = 1 return nil } doLocalExit := func(err *CoordErr) { if err != nil { coordLog.Infof("topic %v PutMessageToCluster msg %v error: %v", topic.GetFullName(), msg, err) if coord.IsWriteDisabled() { topic.DisableForSlave() } } } doLocalCommit := func() error { localErr := logMgr.AppendCommitLog(&commitLog, false) if localErr != nil { coordLog.Errorf("topic : %v, Generator %v failed write commit log : %v, logmgr: %v, %v", topic.GetFullName(), topic.GetMsgGenerator(), localErr, logMgr.pLogID, logMgr.nLogID) } topic.Lock() topic.UpdateCommittedOffset(queueEnd) topic.Unlock() return localErr } doLocalRollback := func() { coordLog.Warningf("failed write begin rollback : %v, %v", topic.GetFullName(), commitLog) topic.Lock() topic.RollbackNoLock(nsqd.BackendOffset(commitLog.MsgOffset), 1) topic.Unlock() } doRefresh := func(d *coordData) *CoordErr { logMgr = d.logMgr if d.GetTopicEpochForWrite() != commitLog.Epoch { coordLog.Warningf("write epoch changed during write: %v, %v", d.GetTopicEpochForWrite(), commitLog) return ErrEpochMismatch } self.requestNotifyNewTopicInfo(d.topicInfo.Name, d.topicInfo.Partition) return nil } doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr { // should retry if failed, and the slave should keep the last success write to avoid the duplicated putErr := c.PutMessage(&tcData.topicLeaderSession, &tcData.topicInfo, commitLog, msg) if putErr != nil { coordLog.Infof("sync write to replica %v failed: %v. put offset:%v, logmgr: %v, %v", nodeID, putErr, commitLog, logMgr.pLogID, logMgr.nLogID) } return putErr } handleSyncResult := func(successNum int, tcData *coordData) bool { if successNum == len(tcData.topicInfo.ISR) { return true } return false } clusterErr := self.doSyncOpToCluster(true, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback, doRefresh, doSlaveSync, handleSyncResult) var err error if clusterErr != nil { err = clusterErr.ToErrorType() } return msg.ID, nsqd.BackendOffset(commitLog.MsgOffset), commitLog.MsgSize, queueEnd, err }