func (s *nsqdCoordGRpcServer) PutMessages(ctx context.Context, req *pb.RpcPutMessages) (*pb.CoordErr, error) { var coordErr pb.CoordErr if coordLog.Level() >= levellogger.LOG_DEBUG { s := time.Now().Unix() defer func() { e := time.Now().Unix() if e-s > int64(RPC_TIMEOUT/2) { coordLog.Infof("PutMessage rpc call used: %v", e-s) } }() } tc, err := s.nsqdCoord.checkWriteForGRpcCall(req.TopicData) if err != nil { coordErr.ErrMsg = err.ErrMsg coordErr.ErrCode = int32(err.ErrCode) coordErr.ErrType = int32(err.ErrType) return &coordErr, nil } // do local pub message var commitData CommitLogData commitData.Epoch = EpochType(req.LogData.Epoch) commitData.LogID = req.LogData.LogID commitData.MsgNum = req.LogData.MsgNum commitData.MsgCnt = req.LogData.MsgCnt commitData.MsgSize = req.LogData.MsgSize commitData.MsgOffset = req.LogData.MsgOffset commitData.LastMsgLogID = req.LogData.LastMsgLogID var msgs []*nsqd.Message for _, pbm := range req.TopicMessage { var msg nsqd.Message msg.ID = nsqd.MessageID(pbm.ID) msg.TraceID = pbm.Trace_ID msg.Attempts = uint16(pbm.Attemps) msg.Timestamp = pbm.Timestamp msg.Body = pbm.Body msgs = append(msgs, &msg) } err = s.nsqdCoord.putMessagesOnSlave(tc, commitData, msgs) if err != nil { coordErr.ErrMsg = err.ErrMsg coordErr.ErrCode = int32(err.ErrCode) coordErr.ErrType = int32(err.ErrType) } return &coordErr, nil }
func TestChannelEmptyConsumer(t *testing.T) { opts := nsqdNs.NewOptions() opts.Logger = newTestLogger(t) tcpAddr, _, nsqd, nsqdServer := mustStartNSQD(opts) defer os.RemoveAll(opts.DataPath) defer nsqdServer.Exit() conn, _ := mustConnectNSQD(tcpAddr) defer conn.Close() topicName := "test_channel_empty" + strconv.Itoa(int(time.Now().Unix())) topic := nsqd.GetTopicIgnPart(topicName) channel := topic.GetChannel("channel") client := nsqdNs.NewClientV2(0, conn, opts, nil) client.SetReadyCount(25) channel.AddClient(client.ID, client) for i := 0; i < 25; i++ { msg := nsqdNs.NewMessage(nsqdNs.MessageID(i), []byte("test")) channel.StartInFlightTimeout(msg, 0, "", opts.MsgTimeout) client.SendingMessage() } for _, cl := range channel.GetClients() { stats := cl.Stats() test.Equal(t, stats.InFlightCount, int64(25)) } channel.SetConsumeOffset(channel.GetChannelEnd().Offset(), channel.GetChannelEnd().TotalMsgCnt(), true) time.Sleep(time.Second) for _, cl := range channel.GetClients() { stats := cl.Stats() test.Equal(t, stats.InFlightCount, int64(0)) } }
func (p *protocolV2) internalPubAndTrace(client *nsqd.ClientV2, params [][]byte, traceEnable bool) ([]byte, error) { startPub := time.Now().UnixNano() bodyLen, topic, err := p.preparePub(client, params, p.ctx.getOpts().MaxMsgSize) if err != nil { return nil, err } if traceEnable && bodyLen <= nsqd.MsgTraceIDLength { return nil, protocol.NewFatalClientErr(nil, "E_BAD_BODY", fmt.Sprintf("invalid body size %d with trace id enabled", bodyLen)) } messageBodyBuffer := topic.BufferPoolGet(int(bodyLen)) defer topic.BufferPoolPut(messageBodyBuffer) asyncAction := shouldHandleAsync(client, params) _, err = io.CopyN(messageBodyBuffer, client.Reader, int64(bodyLen)) if err != nil { return nil, protocol.NewFatalClientErr(err, "E_BAD_MESSAGE", "failed to read message body") } messageBody := messageBodyBuffer.Bytes()[:bodyLen] topicName := topic.GetTopicName() partition := topic.GetTopicPart() var traceID uint64 var realBody []byte if traceEnable { traceID = binary.BigEndian.Uint64(messageBody[:nsqd.MsgTraceIDLength]) realBody = messageBody[nsqd.MsgTraceIDLength:] } else { realBody = messageBody } if p.ctx.checkForMasterWrite(topicName, partition) { id := nsqd.MessageID(0) offset := nsqd.BackendOffset(0) rawSize := int32(0) if asyncAction { err = internalPubAsync(client.PubTimeout, messageBodyBuffer, topic) } else { id, offset, rawSize, _, err = p.ctx.PutMessage(topic, realBody, traceID) } //p.ctx.setHealth(err) if err != nil { topic.GetDetailStats().UpdatePubClientStats(client.RemoteAddr().String(), client.UserAgent, "tcp", 1, true) nsqd.NsqLogger().LogErrorf("topic %v put message failed: %v", topic.GetFullName(), err) if clusterErr, ok := err.(*consistence.CommonCoordErr); ok { if !clusterErr.IsLocalErr() { return nil, protocol.NewClientErr(err, FailedOnNotWritable, "") } } return nil, protocol.NewClientErr(err, "E_PUB_FAILED", err.Error()) } topic.GetDetailStats().UpdatePubClientStats(client.RemoteAddr().String(), client.UserAgent, "tcp", 1, false) cost := time.Now().UnixNano() - startPub topic.GetDetailStats().UpdateTopicMsgStats(int64(len(realBody)), cost/1000) if !traceEnable { return okBytes, nil } return getTracedReponse(messageBodyBuffer, id, traceID, offset, rawSize) } else { topic.GetDetailStats().UpdatePubClientStats(client.RemoteAddr().String(), client.UserAgent, "tcp", 1, true) //forward to master of topic nsqd.NsqLogger().LogDebugf("should put to master: %v, from %v", topic.GetFullName(), client.RemoteAddr) topic.DisableForSlave() return nil, protocol.NewClientErr(err, FailedOnNotLeader, "") } }
func (self *NsqdCoordinator) PutMessagesToCluster(topic *nsqd.Topic, msgs []*nsqd.Message) (nsqd.MessageID, nsqd.BackendOffset, int32, error) { var commitLog CommitLogData topicName := topic.GetTopicName() partition := topic.GetTopicPart() coord, checkErr := self.getTopicCoord(topicName, partition) if checkErr != nil { return nsqd.MessageID(commitLog.LogID), nsqd.BackendOffset(commitLog.MsgOffset), commitLog.MsgSize, checkErr.ToErrorType() } var queueEnd nsqd.BackendQueueEnd var logMgr *TopicCommitLogMgr doLocalWrite := func(d *coordData) *CoordErr { topic.Lock() logMgr = d.logMgr id, offset, writeBytes, totalCnt, qe, localErr := topic.PutMessagesNoLock(msgs) queueEnd = qe topic.Unlock() if localErr != nil { coordLog.Warningf("put batch messages to local failed: %v", localErr) return &CoordErr{localErr.Error(), RpcNoErr, CoordLocalErr} } commitLog.LogID = int64(id) // epoch should not be changed. // leader epoch change means leadership change, leadership change // need disable write which should hold the write lock. // However, we are holding write lock while doing the cluster write replication. commitLog.Epoch = d.GetTopicEpochForWrite() commitLog.LastMsgLogID = int64(msgs[len(msgs)-1].ID) commitLog.MsgOffset = int64(offset) commitLog.MsgSize = writeBytes // This MsgCnt is the total count until now (include the current written batch message count) commitLog.MsgCnt = totalCnt commitLog.MsgNum = int32(len(msgs)) return nil } doLocalExit := func(err *CoordErr) { if err != nil { coordLog.Infof("topic %v PutMessagesToCluster error: %v", topic.GetFullName(), err) if coord.IsWriteDisabled() { topic.DisableForSlave() } } } doLocalCommit := func() error { localErr := logMgr.AppendCommitLog(&commitLog, false) if localErr != nil { coordLog.Errorf("topic : %v, Generator %v failed write commit log : %v, logMgr: %v, %v", topic.GetFullName(), topic.GetMsgGenerator(), localErr, logMgr.pLogID, logMgr.nLogID) } topic.Lock() topic.UpdateCommittedOffset(queueEnd) topic.Unlock() return localErr } doLocalRollback := func() { coordLog.Warningf("failed write begin rollback : %v, %v", topic.GetFullName(), commitLog) topic.Lock() topic.ResetBackendEndNoLock(nsqd.BackendOffset(commitLog.MsgOffset), commitLog.MsgCnt-1) topic.Unlock() } doRefresh := func(d *coordData) *CoordErr { logMgr = d.logMgr if d.GetTopicEpochForWrite() != commitLog.Epoch { coordLog.Warningf("write epoch changed during write: %v, %v", d.GetTopicEpochForWrite(), commitLog) return ErrEpochMismatch } self.requestNotifyNewTopicInfo(d.topicInfo.Name, d.topicInfo.Partition) return nil } doSlaveSync := func(c *NsqdRpcClient, nodeID string, tcData *coordData) *CoordErr { // should retry if failed, and the slave should keep the last success write to avoid the duplicated putErr := c.PutMessages(&tcData.topicLeaderSession, &tcData.topicInfo, commitLog, msgs) if putErr != nil { coordLog.Infof("sync write to replica %v failed: %v, put offset: %v, logmgr: %v, %v", nodeID, putErr, commitLog, logMgr.pLogID, logMgr.nLogID) } return putErr } handleSyncResult := func(successNum int, tcData *coordData) bool { if successNum == len(tcData.topicInfo.ISR) { return true } return false } clusterErr := self.doSyncOpToCluster(true, coord, doLocalWrite, doLocalExit, doLocalCommit, doLocalRollback, doRefresh, doSlaveSync, handleSyncResult) var err error if clusterErr != nil { err = clusterErr.ToErrorType() } return nsqd.MessageID(commitLog.LogID), nsqd.BackendOffset(commitLog.MsgOffset), commitLog.MsgSize, err }
func (s *httpServer) internalPUB(w http.ResponseWriter, req *http.Request, ps httprouter.Params, enableTrace bool) (interface{}, error) { startPub := time.Now().UnixNano() // do not support chunked for http pub, use tcp pub instead. if req.ContentLength > s.ctx.getOpts().MaxMsgSize { return nil, http_api.Err{413, "MSG_TOO_BIG"} } else if req.ContentLength <= 0 { return nil, http_api.Err{406, "MSG_EMPTY"} } // add 1 so that it's greater than our max when we test for it // (LimitReader returns a "fake" EOF) params, topic, err := s.getExistingTopicFromQuery(req) if err != nil { nsqd.NsqLogger().Logf("get topic err: %v", err) return nil, http_api.Err{404, E_TOPIC_NOT_EXIST} } readMax := req.ContentLength + 1 b := topic.BufferPoolGet(int(req.ContentLength)) defer topic.BufferPoolPut(b) asyncAction := !enableTrace n, err := io.CopyN(b, io.LimitReader(req.Body, readMax), int64(req.ContentLength)) body := b.Bytes()[:req.ContentLength] if err != nil { nsqd.NsqLogger().Logf("read request body error: %v", err) body = body[:n] if err == io.EOF || err == io.ErrUnexpectedEOF { // we ignore EOF, maybe the ContentLength is not match? nsqd.NsqLogger().LogWarningf("read request body eof: %v, ContentLength: %v,return length %v.", err, req.ContentLength, n) } else { return nil, http_api.Err{500, "INTERNAL_ERROR"} } } if len(body) == 0 { return nil, http_api.Err{406, "MSG_EMPTY"} } if s.ctx.checkForMasterWrite(topic.GetTopicName(), topic.GetTopicPart()) { var err error traceIDStr := params.Get("trace_id") traceID, err := strconv.ParseUint(traceIDStr, 10, 0) if enableTrace && err != nil { nsqd.NsqLogger().Logf("trace id invalid %v, %v", traceIDStr, err) return nil, http_api.Err{400, "INVALID_TRACE_ID"} } id := nsqd.MessageID(0) offset := nsqd.BackendOffset(0) rawSize := int32(0) if asyncAction { err = internalPubAsync(nil, b, topic) } else { id, offset, rawSize, _, err = s.ctx.PutMessage(topic, body, traceID) } if err != nil { nsqd.NsqLogger().LogErrorf("topic %v put message failed: %v", topic.GetFullName(), err) if clusterErr, ok := err.(*consistence.CommonCoordErr); ok { if !clusterErr.IsLocalErr() { return nil, http_api.Err{400, FailedOnNotWritable} } } return nil, http_api.Err{503, err.Error()} } cost := time.Now().UnixNano() - startPub topic.GetDetailStats().UpdateTopicMsgStats(int64(len(body)), cost/1000) if enableTrace { return struct { Status string `json:"status"` ID uint64 `json:"id"` TraceID string `json:"trace_id"` QueueOffset uint64 `json:"queue_offset"` DataRawSize uint32 `json:"rawsize"` }{"OK", uint64(id), traceIDStr, uint64(offset), uint32(rawSize)}, nil } else { return "OK", nil } } else { nsqd.NsqLogger().LogDebugf("should put to master: %v, from %v", topic.GetFullName(), req.RemoteAddr) topic.DisableForSlave() return nil, http_api.Err{400, FailedOnNotLeader} } }