示例#1
0
func (c *context) PutMessages(topic *nsqd.Topic, msgs []*nsqd.Message) (nsqd.MessageID, nsqd.BackendOffset, int32, error) {
	if c.nsqdCoord == nil {
		id, offset, rawSize, _, _, err := topic.PutMessages(msgs)
		return id, offset, rawSize, err
	}
	return c.nsqdCoord.PutMessagesToCluster(topic, msgs)
}
示例#2
0
func (c *context) DeleteExistingChannel(topic *nsqd.Topic, channelName string) error {
	if c.nsqdCoord == nil {
		err := topic.DeleteExistingChannel(channelName)
		return err
	}
	return c.nsqdCoord.DeleteChannel(topic, channelName)
}
示例#3
0
func readMPUB(r io.Reader, tmp []byte, topic *nsqd.Topic, maxMessageSize int64, traceEnable bool) ([]*nsqd.Message, []*bytes.Buffer, error) {
	numMessages, err := readLen(r, tmp)
	if err != nil {
		return nil, nil, protocol.NewFatalClientErr(err, "E_BAD_BODY", "MPUB failed to read message count")
	}

	if numMessages <= 0 {
		return nil, nil, protocol.NewFatalClientErr(err, "E_BAD_BODY",
			fmt.Sprintf("MPUB invalid message count %d", numMessages))
	}

	messages := make([]*nsqd.Message, 0, numMessages)
	buffers := make([]*bytes.Buffer, 0, numMessages)
	for i := int32(0); i < numMessages; i++ {
		messageSize, err := readLen(r, tmp)
		if err != nil {
			return nil, buffers, protocol.NewFatalClientErr(err, "E_BAD_MESSAGE",
				fmt.Sprintf("MPUB failed to read message(%d) body size", i))
		}

		if messageSize <= 0 {
			return nil, buffers, protocol.NewFatalClientErr(nil, "E_BAD_MESSAGE",
				fmt.Sprintf("MPUB invalid message(%d) body size %d", i, messageSize))
		}

		if int64(messageSize) > maxMessageSize {
			return nil, buffers, protocol.NewFatalClientErr(nil, "E_BAD_MESSAGE",
				fmt.Sprintf("MPUB message too big %d > %d", messageSize, maxMessageSize))
		}

		b := topic.BufferPoolGet(int(messageSize))
		buffers = append(buffers, b)
		_, err = io.CopyN(b, r, int64(messageSize))
		if err != nil {
			return nil, buffers, protocol.NewFatalClientErr(err, "E_BAD_MESSAGE", "MPUB failed to read message body")
		}
		msgBody := b.Bytes()[:messageSize]

		traceID := uint64(0)
		var realBody []byte
		if traceEnable {
			if messageSize <= nsqd.MsgTraceIDLength {
				return nil, buffers, protocol.NewFatalClientErr(nil, "E_BAD_MESSAGE",
					fmt.Sprintf("MPUB invalid message(%d) body size %d for tracing", i, messageSize))
			}
			traceID = binary.BigEndian.Uint64(msgBody[:nsqd.MsgTraceIDLength])
			realBody = msgBody[nsqd.MsgTraceIDLength:]
		} else {
			realBody = msgBody
		}
		msg := nsqd.NewMessage(0, realBody)
		msg.TraceID = traceID
		messages = append(messages, msg)
		topic.GetDetailStats().UpdateTopicMsgStats(int64(len(realBody)), 0)
	}

	return messages, buffers, nil
}
示例#4
0
func (c *context) PutMessage(topic *nsqd.Topic,
	msg []byte, traceID uint64) (nsqd.MessageID, nsqd.BackendOffset, int32, nsqd.BackendQueueEnd, error) {
	if c.nsqdCoord == nil {
		msg := nsqd.NewMessage(0, msg)
		msg.TraceID = traceID
		return topic.PutMessage(msg)
	}
	return c.nsqdCoord.PutMessageToCluster(topic, msg, traceID)
}
func (self *NsqdCoordinator) DeleteChannel(topic *nsqd.Topic, channelName string) error {
	topicName := topic.GetTopicName()
	partition := topic.GetTopicPart()
	coord, checkErr := self.getTopicCoord(topicName, partition)
	if checkErr != nil {
		return checkErr.ToErrorType()
	}

	doLocalWrite := func(d *coordData) *CoordErr {
		localErr := topic.DeleteExistingChannel(channelName)
		if localErr != nil {
			coordLog.Infof("deleteing local channel %v error: %v", channelName, localErr)
		}
		return nil
	}
	doLocalExit := func(err *CoordErr) {}
	doLocalCommit := func() error {
		return nil
	}
	doLocalRollback := func() {
	}
	doRefresh := func(d *coordData) *CoordErr {
		return nil
	}
	doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr {
		rpcErr := c.DeleteChannel(&tcData.topicLeaderSession, &tcData.topicInfo, channelName)
		if rpcErr != nil {
			coordLog.Infof("delete channel(%v) to replica %v failed: %v", channelName,
				nodeID, rpcErr)
		}
		return rpcErr
	}
	handleSyncResult := func(successNum int, tcData *coordData) bool {
		// we can ignore the error if this channel is not ordered. (just sync next time)
		if successNum == len(tcData.topicInfo.ISR) {
			return true
		}
		return false
	}
	clusterErr := self.doSyncOpToCluster(false, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback,
		doRefresh, doSlaveSync, handleSyncResult)
	if clusterErr != nil {
		return clusterErr.ToErrorType()
	}
	return nil
}
示例#6
0
func (c *context) internalPubLoop(topic *nsqd.Topic) {
	messages := make([]*nsqd.Message, 0, 100)
	pubInfoList := make([]*nsqd.PubInfo, 0, 100)
	topicName := topic.GetTopicName()
	partition := topic.GetTopicPart()
	nsqd.NsqLogger().Logf("start pub loop for topic: %v ", topic.GetFullName())
	defer func() {
		done := false
		for !done {
			select {
			case info := <-topic.GetWaitChan():
				pubInfoList = append(pubInfoList, info)
			default:
				done = true
			}
		}
		nsqd.NsqLogger().Logf("quit pub loop for topic: %v, left: %v ", topic.GetFullName(), len(pubInfoList))
		for _, info := range pubInfoList {
			info.Err = nsqd.ErrExiting
			close(info.Done)
		}
	}()
	quitChan := topic.QuitChan()
	infoChan := topic.GetWaitChan()
	for {
		select {
		case <-quitChan:
			return
		case info := <-infoChan:
			if info.MsgBody.Len() <= 0 {
				nsqd.NsqLogger().Logf("empty msg body")
			}
			messages = append(messages, nsqd.NewMessage(0, info.MsgBody.Bytes()))
			pubInfoList = append(pubInfoList, info)
			// TODO: avoid too much in a batch
		default:
			if len(pubInfoList) == 0 {
				nsqd.NsqLogger().LogDebugf("topic %v pub loop waiting for message", topic.GetFullName())
				select {
				case <-quitChan:
					return
				case info := <-infoChan:
					if info.MsgBody.Len() <= 0 {
						nsqd.NsqLogger().Logf("empty msg body")
					}
					messages = append(messages, nsqd.NewMessage(0, info.MsgBody.Bytes()))
					pubInfoList = append(pubInfoList, info)
				}
				continue
			}
			if len(pubInfoList) > 1 {
				nsqd.NsqLogger().LogDebugf("pub loop batch number: %v", len(pubInfoList))
			}
			var retErr error
			if c.checkForMasterWrite(topicName, partition) {
				_, _, _, err := c.PutMessages(topic, messages)
				if err != nil {
					nsqd.NsqLogger().LogErrorf("topic %v put messages %v failed: %v", topic.GetFullName(), len(messages), err)
					retErr = err
				}
			} else {
				topic.DisableForSlave()
				nsqd.NsqLogger().LogDebugf("should put to master: %v",
					topic.GetFullName())
				retErr = consistence.ErrNotTopicLeader.ToErrorType()
			}
			for _, info := range pubInfoList {
				info.Err = retErr
				close(info.Done)
			}
			pubInfoList = pubInfoList[:0]
			messages = messages[:0]
		}
	}
}
示例#7
0
func internalPubAsync(clientTimer *time.Timer, msgBody *bytes.Buffer, topic *nsqd.Topic) error {
	if topic.Exiting() {
		return nsqd.ErrExiting
	}
	info := &nsqd.PubInfo{
		Done:     make(chan struct{}),
		MsgBody:  msgBody,
		StartPub: time.Now(),
	}
	if clientTimer == nil {
		clientTimer = time.NewTimer(time.Second * 5)
	} else {
		clientTimer.Reset(time.Second * 5)
	}
	select {
	case topic.GetWaitChan() <- info:
	default:
		select {
		case topic.GetWaitChan() <- info:
		case <-topic.QuitChan():
			nsqd.NsqLogger().Infof("topic %v put messages failed at exiting", topic.GetFullName())
			return nsqd.ErrExiting
		case <-clientTimer.C:
			nsqd.NsqLogger().Infof("topic %v put messages timeout ", topic.GetFullName())
			return ErrPubToWaitTimeout
		}
	}
	<-info.Done
	return info.Err
}
func (self *NsqdCoordinator) putMessagesOnSlave(coord *TopicCoordinator, logData CommitLogData, msgs []*nsqd.Message) *CoordErr {
	if len(msgs) == 0 {
		return ErrPubArgError
	}
	if logData.LogID != int64(msgs[0].ID) {
		return ErrPubArgError
	}
	var logMgr *TopicCommitLogMgr
	// this last log id should be used on slave to avoid the slave switch
	// override the leader's prev mpub message id.
	// While slave is chosen as leader, the next id should be larger than the last logid.
	// Because the mpub maybe already committed after the leader is down, the new leader should begin
	// with the last message id + 1 for next message.
	lastMsgLogID := int64(msgs[len(msgs)-1].ID)
	if logData.LastMsgLogID != lastMsgLogID {
		return ErrPubArgError
	}

	var queueEnd nsqd.BackendQueueEnd
	var topic *nsqd.Topic
	checkDupOnSlave := func(tc *coordData) bool {
		if coordLog.Level() >= levellogger.LOG_DETAIL {
			topicName := tc.topicInfo.Name
			coordLog.Debugf("pub on slave : %v, msg count: %v", topicName, len(msgs))
		}
		logMgr = tc.logMgr
		if logMgr.IsCommitted(logData.LogID) {
			coordLog.Infof("put the already committed log id : %v", logData.LogID)
			return true
		}
		return false
	}

	doLocalWriteOnSlave := func(tc *coordData) *CoordErr {
		var localErr error
		var start time.Time
		checkCost := coordLog.Level() >= levellogger.LOG_DEBUG
		if self.enableBenchCost {
			checkCost = true
		}
		if checkCost {
			start = time.Now()
		}
		topicName := tc.topicInfo.Name
		partition := tc.topicInfo.Partition
		topic, localErr = self.localNsqd.GetExistingTopic(topicName, partition)
		if localErr != nil {
			coordLog.Infof("pub on slave missing topic : %v", topicName)
			// leave the isr and try re-sync with leader
			return &CoordErr{localErr.Error(), RpcErrTopicNotExist, CoordSlaveErr}
		}

		topic.Lock()
		var cost time.Duration
		if checkCost {
			cost = time.Now().Sub(start)
			if cost > time.Millisecond {
				coordLog.Infof("prepare write on slave local cost :%v", cost)
			}
		}

		queueEnd, localErr = topic.PutMessagesOnReplica(msgs, nsqd.BackendOffset(logData.MsgOffset))
		if checkCost {
			cost2 := time.Now().Sub(start)
			if cost2 > time.Millisecond {
				coordLog.Infof("write local on slave cost :%v, %v", cost, cost2)
			}
		}

		topic.Unlock()
		if localErr != nil {
			logIndex, lastLogOffset, lastLog, _ := logMgr.GetLastCommitLogOffsetV2()
			coordLog.Errorf("put messages on slave failed: %v, slave last logid: %v, data: %v:%v, %v",
				localErr, logMgr.GetLastCommitLogID(), logIndex, lastLogOffset, lastLog)
			return &CoordErr{localErr.Error(), RpcCommonErr, CoordSlaveErr}
		}
		return nil
	}

	doLocalCommit := func() error {
		localErr := logMgr.AppendCommitLog(&logData, true)
		if localErr != nil {
			coordLog.Errorf("write commit log on slave failed: %v", localErr)
			return localErr
		}
		topic.Lock()
		topic.UpdateCommittedOffset(queueEnd)
		topic.Unlock()
		return nil
	}

	doLocalExit := func(err *CoordErr) {
		if err != nil {
			coordLog.Warningf("failed to batch put messages on slave: %v", err)
		}
	}
	return self.doWriteOpOnSlave(coord, checkDupOnSlave, doLocalWriteOnSlave, doLocalCommit,
		doLocalExit)
}
func (self *NsqdCoordinator) putMessageOnSlave(coord *TopicCoordinator, logData CommitLogData, msg *nsqd.Message) *CoordErr {
	var logMgr *TopicCommitLogMgr
	var topic *nsqd.Topic
	var queueEnd nsqd.BackendQueueEnd

	checkDupOnSlave := func(tc *coordData) bool {
		if coordLog.Level() >= levellogger.LOG_DETAIL {
			topicName := tc.topicInfo.Name
			coordLog.Debugf("pub on slave : %v, msg %v", topicName, msg.ID)
		}
		logMgr = tc.logMgr
		if logMgr.IsCommitted(logData.LogID) {
			coordLog.Infof("pub the already committed log id : %v", logData.LogID)
			return true
		}
		return false
	}

	doLocalWriteOnSlave := func(tc *coordData) *CoordErr {
		var localErr error
		topicName := tc.topicInfo.Name
		partition := tc.topicInfo.Partition
		topic, localErr = self.localNsqd.GetExistingTopic(topicName, partition)
		if localErr != nil {
			coordLog.Infof("pub on slave missing topic : %v", topicName)
			// leave the isr and try re-sync with leader
			return &CoordErr{localErr.Error(), RpcErrTopicNotExist, CoordSlaveErr}
		}

		if topic.GetTopicPart() != partition {
			coordLog.Errorf("topic on slave has different partition : %v vs %v", topic.GetTopicPart(), partition)
			return &CoordErr{ErrLocalTopicPartitionMismatch.String(), RpcErrTopicNotExist, CoordSlaveErr}
		}

		topic.Lock()
		queueEnd, localErr = topic.PutMessageOnReplica(msg, nsqd.BackendOffset(logData.MsgOffset))
		topic.Unlock()
		if localErr != nil {
			coordLog.Errorf("put message on slave failed: %v", localErr)
			return &CoordErr{localErr.Error(), RpcCommonErr, CoordSlaveErr}
		}
		return nil
	}

	doLocalCommit := func() error {
		localErr := logMgr.AppendCommitLog(&logData, true)
		if localErr != nil {
			coordLog.Errorf("write commit log on slave failed: %v", localErr)
			return localErr
		}
		topic.Lock()
		topic.UpdateCommittedOffset(queueEnd)
		topic.Unlock()
		return nil
	}
	doLocalExit := func(err *CoordErr) {
		if err != nil {
			coordLog.Infof("slave put message %v error: %v", logData, err)
		}
	}

	return self.doWriteOpOnSlave(coord, checkDupOnSlave, doLocalWriteOnSlave, doLocalCommit, doLocalExit)
}
func (self *NsqdCoordinator) PutMessageToCluster(topic *nsqd.Topic,
	body []byte, traceID uint64) (nsqd.MessageID, nsqd.BackendOffset, int32, nsqd.BackendQueueEnd, error) {
	var commitLog CommitLogData
	var queueEnd nsqd.BackendQueueEnd
	msg := nsqd.NewMessage(0, body)
	msg.TraceID = traceID

	topicName := topic.GetTopicName()
	partition := topic.GetTopicPart()
	coord, checkErr := self.getTopicCoord(topicName, partition)
	if checkErr != nil {
		return msg.ID, nsqd.BackendOffset(commitLog.MsgOffset), commitLog.MsgSize, queueEnd, checkErr.ToErrorType()
	}

	var logMgr *TopicCommitLogMgr

	doLocalWrite := func(d *coordData) *CoordErr {
		logMgr = d.logMgr
		topic.Lock()
		id, offset, writeBytes, qe, localErr := topic.PutMessageNoLock(msg)
		queueEnd = qe
		topic.Unlock()
		if localErr != nil {
			coordLog.Warningf("put message to local failed: %v", localErr)
			return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr}
		}
		commitLog.LogID = int64(id)
		// epoch should not be changed.
		// leader epoch change means leadership change, leadership change
		// need disable write which should hold the write lock.
		// However, we are holding write lock while doing the cluster write replication.
		commitLog.Epoch = d.GetTopicEpochForWrite()
		commitLog.LastMsgLogID = commitLog.LogID
		commitLog.MsgOffset = int64(offset)
		commitLog.MsgSize = writeBytes
		commitLog.MsgCnt = queueEnd.TotalMsgCnt()
		commitLog.MsgNum = 1

		return nil
	}
	doLocalExit := func(err *CoordErr) {
		if err != nil {
			coordLog.Infof("topic %v PutMessageToCluster msg %v error: %v", topic.GetFullName(), msg, err)
			if coord.IsWriteDisabled() {
				topic.DisableForSlave()
			}
		}
	}
	doLocalCommit := func() error {
		localErr := logMgr.AppendCommitLog(&commitLog, false)
		if localErr != nil {
			coordLog.Errorf("topic : %v, Generator %v failed write commit log : %v, logmgr: %v, %v",
				topic.GetFullName(), topic.GetMsgGenerator(), localErr, logMgr.pLogID, logMgr.nLogID)
		}
		topic.Lock()
		topic.UpdateCommittedOffset(queueEnd)
		topic.Unlock()
		return localErr
	}
	doLocalRollback := func() {
		coordLog.Warningf("failed write begin rollback : %v, %v", topic.GetFullName(), commitLog)
		topic.Lock()
		topic.RollbackNoLock(nsqd.BackendOffset(commitLog.MsgOffset), 1)
		topic.Unlock()
	}
	doRefresh := func(d *coordData) *CoordErr {
		logMgr = d.logMgr
		if d.GetTopicEpochForWrite() != commitLog.Epoch {
			coordLog.Warningf("write epoch changed during write: %v, %v", d.GetTopicEpochForWrite(), commitLog)
			return ErrEpochMismatch
		}
		self.requestNotifyNewTopicInfo(d.topicInfo.Name, d.topicInfo.Partition)
		return nil
	}
	doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr {
		// should retry if failed, and the slave should keep the last success write to avoid the duplicated
		putErr := c.PutMessage(&tcData.topicLeaderSession, &tcData.topicInfo, commitLog, msg)
		if putErr != nil {
			coordLog.Infof("sync write to replica %v failed: %v. put offset:%v, logmgr: %v, %v",
				nodeID, putErr, commitLog, logMgr.pLogID, logMgr.nLogID)
		}
		return putErr
	}
	handleSyncResult := func(successNum int, tcData *coordData) bool {
		if successNum == len(tcData.topicInfo.ISR) {
			return true
		}
		return false
	}

	clusterErr := self.doSyncOpToCluster(true, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback,
		doRefresh, doSlaveSync, handleSyncResult)

	var err error
	if clusterErr != nil {
		err = clusterErr.ToErrorType()
	}
	return msg.ID, nsqd.BackendOffset(commitLog.MsgOffset), commitLog.MsgSize, queueEnd, err
}