func (p *program) Start() error { glog.InitWithFlag(flagSet) flagSet.Parse(os.Args[1:]) if *showVersion { fmt.Println(version.String("nsqlookupd")) os.Exit(0) } var cfg map[string]interface{} if *config != "" { _, err := toml.DecodeFile(*config, &cfg) if err != nil { log.Fatalf("ERROR: failed to load config file %s - %s", *config, err.Error()) } } opts := nsqlookupd.NewOptions() options.Resolve(opts, flagSet, cfg) if opts.LogDir != "" { glog.SetGLogDir(opts.LogDir) } nsqlookupd.SetLogger(opts) glog.StartWorker(time.Second * 2) daemon := nsqlookupd.New(opts) daemon.Main() p.nsqlookupd = daemon return nil }
func main() { glog.InitWithFlag(flagSet) flagSet.Parse(os.Args[1:]) if *showVersion { fmt.Println(version.String("nsqadmin")) return } defer glog.Flush() if *templateDir != "" { log.Printf("WARNING: --template-dir is deprecated and will be removed in the next release (templates are now compiled into the binary)") } exitChan := make(chan int) signalChan := make(chan os.Signal, 1) go func() { <-signalChan exitChan <- 1 }() signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) var cfg map[string]interface{} if *config != "" { _, err := toml.DecodeFile(*config, &cfg) if err != nil { log.Fatalf("ERROR: failed to load config file %s - %s", *config, err) } } opts := nsqadmin.NewOptions() options.Resolve(opts, flagSet, cfg) if opts.LogDir != "" { glog.SetGLogDir(opts.LogDir) } glog.StartWorker(time.Second * 2) nsqadmin := nsqadmin.New(opts) nsqadmin.Main() <-exitChan nsqadmin.Exit() }
func mustStartNSQD(opts *nsqdNs.Options) (*net.TCPAddr, *net.TCPAddr, *nsqdNs.NSQD, *NsqdServer) { opts.TCPAddress = "127.0.0.1:0" opts.HTTPAddress = "127.0.0.1:0" opts.HTTPSAddress = "127.0.0.1:0" if opts.DataPath == "" { tmpDir, err := ioutil.TempDir("", fmt.Sprintf("nsq-test-%d", time.Now().UnixNano())) if err != nil { panic(err) } opts.DataPath = tmpDir } if opts.LogDir == "" { opts.LogDir = opts.DataPath } glog.SetGLogDir(opts.LogDir) glog.StartWorker(time.Second) _, nsqdServer := NewNsqdServer(opts) nsqdServer.Main() return nsqdServer.ctx.realTCPAddr(), nsqdServer.ctx.realHTTPAddr(), nsqdServer.ctx.nsqd, nsqdServer }
func TestNsqLookupNsqdCreateTopic(t *testing.T) { // on 4 nodes, we should test follow cases // 1 partition 1 replica // 1 partition 3 replica // 3 partition 1 replica // 2 partition 2 replica if testing.Verbose() { SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN) glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } else { SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG) } idList := []string{"id1", "id2", "id3", "id4"} lookupCoord1, nodeInfoList := prepareCluster(t, idList, false) for _, n := range nodeInfoList { defer os.RemoveAll(n.dataPath) defer n.localNsqd.Exit() defer n.nsqdCoord.Stop() } test.Equal(t, 4, len(nodeInfoList)) topic_p1_r1 := "test-nsqlookup-topic-unit-testcreate-p1-r1" topic_p1_r3 := "test-nsqlookup-topic-unit-testcreate-p1-r3" topic_p3_r1 := "test-nsqlookup-topic-unit-testcreate-p3-r1" topic_p2_r2 := "test-nsqlookup-topic-unit-testcreate-p2-r2" lookupLeadership := lookupCoord1.leadership time.Sleep(time.Second) checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r3, "**")) checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p3_r1, "**")) checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p2_r2, "**")) time.Sleep(time.Second * 3) defer func() { checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r3, "**")) checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p3_r1, "**")) checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p2_r2, "**")) time.Sleep(time.Second * 3) lookupCoord1.Stop() }() // test new topic create err := lookupCoord1.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*3) pmeta, _, err := lookupLeadership.GetTopicMetaInfo(topic_p1_r1) pn := pmeta.PartitionNum test.Nil(t, err) test.Equal(t, pn, 1) t0, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), 1) t.Logf("t0 leader is: %v", t0.Leader) if nodeInfoList[t0.Leader] == nil { t.Fatalf("no leader: %v, %v", t0, nodeInfoList) } t0LeaderCoord := nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr := t0LeaderCoord.getTopicCoord(topic_p1_r1, 0) test.Nil(t, coordErr) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) test.Equal(t, len(tc0.topicInfo.ISR), 1) err = lookupCoord1.CreateTopic(topic_p1_r3, TopicMetaInfo{1, 3, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*5) lookupCoord1.triggerCheckTopics("", 0, 0) waitClusterStable(lookupCoord1, time.Second*5) pmeta, _, err = lookupLeadership.GetTopicMetaInfo(topic_p1_r3) pn = pmeta.PartitionNum test.Nil(t, err) test.Equal(t, pn, 1) t0, err = lookupLeadership.GetTopicInfo(topic_p1_r3, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), 3) t.Logf("t0 leader is: %v", t0.Leader) if nodeInfoList[t0.Leader] == nil { t.Fatalf("no leader: %v, %v", t0, nodeInfoList) } t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic_p1_r3, 0) test.Nil(t, coordErr) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) test.Equal(t, len(tc0.topicInfo.ISR), 3) err = lookupCoord1.CreateTopic(topic_p3_r1, TopicMetaInfo{3, 1, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*2) waitClusterStable(lookupCoord1, time.Second*5) pmeta, _, err = lookupLeadership.GetTopicMetaInfo(topic_p3_r1) pn = pmeta.PartitionNum test.Nil(t, err) test.Equal(t, pn, 3) t0, err = lookupLeadership.GetTopicInfo(topic_p3_r1, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), 1) t.Logf("t0 leader is: %v", t0.Leader) if nodeInfoList[t0.Leader] == nil { t.Fatalf("no leader: %v, %v", t0, nodeInfoList) } t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic_p3_r1, 0) test.Nil(t, coordErr) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) test.Equal(t, len(tc0.topicInfo.ISR), 1) t1, err := lookupLeadership.GetTopicInfo(topic_p3_r1, 1) t1LeaderCoord := nodeInfoList[t1.Leader].nsqdCoord test.NotNil(t, t1LeaderCoord) tc1, coordErr := t1LeaderCoord.getTopicCoord(topic_p3_r1, 1) test.Nil(t, coordErr) test.Equal(t, tc1.topicInfo.Leader, t1.Leader) test.Equal(t, len(tc1.topicInfo.ISR), 1) err = lookupCoord1.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*3) waitClusterStable(lookupCoord1, time.Second*5) pmeta, _, err = lookupLeadership.GetTopicMetaInfo(topic_p2_r2) pn = pmeta.PartitionNum test.Nil(t, err) test.Equal(t, pn, 2) t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), 2) t.Logf("t0 leader is: %v", t0.Leader) if nodeInfoList[t0.Leader] == nil { t.Fatalf("no leader: %v, %v", t0, nodeInfoList) } t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic_p2_r2, 0) test.Nil(t, coordErr) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) test.Equal(t, len(tc0.topicInfo.ISR), 2) t1, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 1) t1LeaderCoord = nodeInfoList[t1.Leader].nsqdCoord test.NotNil(t, t1LeaderCoord) tc1, coordErr = t1LeaderCoord.getTopicCoord(topic_p2_r2, 1) test.Nil(t, coordErr) test.Equal(t, tc1.topicInfo.Leader, t1.Leader) test.Equal(t, len(tc1.topicInfo.ISR), 2) // test create on exist topic, create on partial partition oldMeta, _, err := lookupCoord1.leadership.GetTopicMetaInfo(topic_p2_r2) test.Nil(t, err) err = lookupCoord1.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 1, 1}) test.NotNil(t, err) waitClusterStable(lookupCoord1, time.Second) waitClusterStable(lookupCoord1, time.Second*5) newMeta, _, err := lookupCoord1.leadership.GetTopicMetaInfo(topic_p2_r2) test.Nil(t, err) test.Equal(t, oldMeta, newMeta) }
func testNsqLookupNsqdNodesChange(t *testing.T, useFakeLeadership bool) { if testing.Verbose() { SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_INFO) glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } else { SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG) } idList := []string{"id1", "id2", "id3", "id4", "id5"} lookupCoord1, nodeInfoList := prepareCluster(t, idList, useFakeLeadership) for _, n := range nodeInfoList { defer os.RemoveAll(n.dataPath) defer n.localNsqd.Exit() defer n.nsqdCoord.Stop() } topic := "test-nsqlookup-topic-unit-test" lookupLeadership := lookupCoord1.leadership lookupCoord1.DeleteTopic(topic, "**") topic3 := topic + topic lookupCoord1.DeleteTopic(topic3, "**") time.Sleep(time.Second) defer func() { lookupCoord1.DeleteTopic(topic, "**") lookupCoord1.DeleteTopic(topic3, "**") time.Sleep(time.Second * 3) lookupCoord1.Stop() }() // test new topic create err := lookupCoord1.CreateTopic(topic, TopicMetaInfo{2, 2, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*3) pmeta, _, err := lookupLeadership.GetTopicMetaInfo(topic) pn := pmeta.PartitionNum test.Nil(t, err) test.Equal(t, pn, 2) t0, err := lookupLeadership.GetTopicInfo(topic, 0) test.Nil(t, err) t1, err := lookupLeadership.GetTopicInfo(topic, 1) test.Nil(t, err) test.Equal(t, len(t0.ISR), 2) test.Equal(t, len(t1.ISR), 2) t.Log(t0) t.Log(t1) test.NotEqual(t, t0.Leader, t1.Leader) t0LeaderCoord := nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr := t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) test.Equal(t, len(tc0.topicInfo.ISR), 2) t1LeaderCoord := nodeInfoList[t1.Leader].nsqdCoord test.NotNil(t, t1LeaderCoord) tc1, coordErr := t1LeaderCoord.getTopicCoord(topic, 1) test.Nil(t, coordErr) test.Equal(t, tc1.topicInfo.Leader, t1.Leader) test.Equal(t, len(tc1.topicInfo.ISR), 2) coordLog.Warningf("============= begin test isr node failed ====") // test isr node lost lostNodeID := t0.ISR[1] atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, err = lookupLeadership.GetTopicInfo(topic, 0) test.Nil(t, err) test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true) test.Equal(t, len(t0.ISR), t0.Replica) test.Equal(t, t0.Leader, t0.ISR[0]) // clear topic info on failed node, test the reload for failed node nodeInfoList[lostNodeID].nsqdCoord.topicCoords = make(map[string]map[int]*TopicCoordinator) // test new catchup and new isr atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, _ = lookupLeadership.GetTopicInfo(topic, 0) test.Equal(t, len(t0.CatchupList), 0) test.Equal(t, len(t0.ISR) >= t0.Replica, true) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, t0.Leader, t0.ISR[0]) lookupCoord1.triggerCheckTopics("", 0, time.Second) time.Sleep(time.Second) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // should remove the unnecessary node test.Equal(t, len(t0.ISR), t0.Replica) coordLog.Warningf("============= begin test leader failed ====") // test leader node lost lostNodeID = t0.Leader atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, _ = lookupLeadership.GetTopicInfo(topic, 0) t.Log(t0) test.Equal(t, t0.Replica, len(t0.ISR)) test.Equal(t, t0.Leader, t0.ISR[0]) test.NotEqual(t, t0.Leader, lostNodeID) //test.Equal(t, len(t0.CatchupList), 1) test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true) t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) // test lost leader node rejoin atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, _ = lookupLeadership.GetTopicInfo(topic, 0) t.Log(t0) test.Equal(t, len(t0.CatchupList), 0) test.Equal(t, len(t0.ISR) >= t0.Replica, true) t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // should remove the unnecessary node test.Equal(t, len(t0.ISR), t0.Replica) // test old leader failed and begin elect new and then new leader failed coordLog.Warningf("============= begin test old leader failed and then new leader failed ====") lostNodeID = t0.Leader lostISRID := t0.ISR[1] if lostISRID == lostNodeID { lostISRID = t0.ISR[0] } atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) time.Sleep(time.Millisecond) atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 1) nodeInfoList[lostISRID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostISRID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) nodeInfoList[lostISRID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostISRID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) waitClusterStable(lookupCoord1, time.Second*5) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) test.Equal(t, true, len(t0.ISR) >= t0.Replica) test.Equal(t, t0.Leader == t0.ISR[0] || t0.Leader == t0.ISR[1], true) t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) waitClusterStable(lookupCoord1, time.Second*5) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // should remove the unnecessary node test.Equal(t, t0.Replica, len(t0.ISR)) // test join isr timeout lostNodeID = t1.ISR[1] atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) // with only 2 replica, the isr join fail should not change the isr list nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true) waitClusterStable(lookupCoord1, time.Second*10) t1, _ = lookupLeadership.GetTopicInfo(topic, 1) test.Equal(t, true, len(t1.ISR)+len(t1.CatchupList) >= t1.Replica) test.Equal(t, t1.Leader == t1.ISR[0] || t1.Leader == t1.ISR[1], true) nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false) waitClusterStable(lookupCoord1, time.Second*5) // test new topic create coordLog.Warningf("============= begin test 3 replicas ====") err = lookupCoord1.CreateTopic(topic3, TopicMetaInfo{1, 3, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*5) // with 3 replica, the isr join timeout will change the isr list if the isr has the quorum nodes t3, err := lookupLeadership.GetTopicInfo(topic3, 0) test.Nil(t, err) test.Equal(t, len(t3.ISR), t3.Replica) lostNodeID = t3.ISR[1] atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true) waitClusterStable(lookupCoord1, time.Second*5) t3, _ = lookupLeadership.GetTopicInfo(topic3, 0) test.Equal(t, true, len(t3.ISR) >= t3.Replica-1) test.Equal(t, true, len(t3.ISR) <= t3.Replica) test.Equal(t, t3.Leader == t3.ISR[0] || t3.Leader == t3.ISR[1], true) nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false) waitClusterStable(lookupCoord1, time.Second*5) glog.Flush() t0, _ = lookupLeadership.GetTopicInfo(topic, 0) test.Equal(t, true, len(t0.ISR) >= t0.Replica) t1, _ = lookupLeadership.GetTopicInfo(topic, 1) test.Equal(t, true, len(t1.ISR) >= t0.Replica) // before migrate really start, the isr should not reach the replica factor // however, catch up may start early while check leadership or enable topic write t3, _ = lookupLeadership.GetTopicInfo(topic3, 0) test.Equal(t, true, len(t3.ISR)+len(t3.CatchupList) >= t3.Replica) t0IsrNum := 2 t1IsrNum := 2 coordLog.Warningf("========== begin test quit ====") quitList := make([]*NsqdCoordinator, 0) quitList = append(quitList, nodeInfoList[t0.Leader].nsqdCoord) if t1.Leader != t0.Leader { quitList = append(quitList, nodeInfoList[t1.Leader].nsqdCoord) } if t3.Leader != t0.Leader && t3.Leader != t1.Leader { quitList = append(quitList, nodeInfoList[t3.Leader].nsqdCoord) } for id, n := range nodeInfoList { if id == t0.Leader || id == t1.Leader || id == t3.Leader { continue } quitList = append(quitList, n.nsqdCoord) } test.Equal(t, len(nodeInfoList), len(quitList)) for _, nsqdCoord := range quitList { failedID := nsqdCoord.myNode.GetID() delete(nodeInfoList, failedID) nsqdCoord.Stop() if t0IsrNum > 1 { if FindSlice(t0.ISR, failedID) != -1 { t0IsrNum-- } } if t1IsrNum > 1 { if FindSlice(t1.ISR, failedID) != -1 { t1IsrNum-- } } waitClusterStable(lookupCoord1, time.Second*5) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // we have no failed node in isr or we got the last failed node leaving in isr. t.Log(t0) test.Equal(t, FindSlice(t0.ISR, failedID) == -1 || (len(t0.ISR) == 1 && t0.ISR[0] == failedID), true) test.Equal(t, true, len(t0.ISR) >= t0IsrNum) t1, _ = lookupLeadership.GetTopicInfo(topic, 1) t.Log(t1) test.Equal(t, FindSlice(t1.ISR, failedID) == -1 || (len(t1.ISR) == 1 && t1.ISR[0] == failedID), true) test.Equal(t, true, len(t1.ISR) >= t1IsrNum) t3, _ = lookupLeadership.GetTopicInfo(topic3, 0) t.Log(t3) test.Equal(t, FindSlice(t3.ISR, failedID) == -1 || (len(t3.ISR) == 1 && t3.ISR[0] == failedID), true) } }
func TestNsqLookupMovePartition(t *testing.T) { if testing.Verbose() { SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN) glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } else { SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG) } idList := []string{"id1", "id2", "id3", "id4", "id5"} lookupCoord, nodeInfoList := prepareCluster(t, idList, false) for _, n := range nodeInfoList { defer os.RemoveAll(n.dataPath) defer n.localNsqd.Exit() defer n.nsqdCoord.Stop() } topic_p1_r1 := "test-nsqlookup-topic-unit-test-move-p1-r1" topic_p2_r2 := "test-nsqlookup-topic-unit-test-move-p2-r2" lookupLeadership := lookupCoord.leadership checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**")) time.Sleep(time.Second * 3) defer func() { checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**")) time.Sleep(time.Second * 3) lookupCoord.Stop() }() // test new topic create err := lookupCoord.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) err = lookupCoord.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) lookupCoord.triggerCheckTopics("", 0, 0) waitClusterStable(lookupCoord, time.Second*3) // test move leader to other isr; // test move leader to other catchup; // test move non-leader to other node; t0, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), 1) // move p1_r1 leader to other node toNode := "" for _, node := range nodeInfoList { if node.nodeInfo.GetID() == t0.Leader { continue } toNode = node.nodeInfo.GetID() break } lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second) err = lookupCoord.MoveTopicPartitionDataByManual(topic_p1_r1, 0, true, t0.Leader, toNode) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic_p1_r1, 0) test.Nil(t, err) // it may be two nodes in isr if the moved leader rejoin as isr test.Equal(t, len(t0.ISR) >= 1, true) test.Equal(t, t0.Leader, toNode) t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), 2) toNode = "" for _, nid := range t0.ISR { if nid == t0.Leader { continue } toNode = nid break } waitClusterStable(lookupCoord, time.Second*3) // move leader to other isr node oldLeader := t0.Leader err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, true, t0.Leader, toNode) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR) >= 2, true) test.NotEqual(t, t0.Leader, oldLeader) test.Equal(t, t0.Leader, toNode) // move leader to other non-isr node toNode = "" for _, node := range nodeInfoList { if FindSlice(t0.ISR, node.nodeInfo.GetID()) != -1 { continue } // check other partition t1, err := lookupLeadership.GetTopicInfo(topic_p2_r2, 1) if err == nil { if FindSlice(t1.ISR, node.nodeInfo.GetID()) != -1 { continue } } toNode = node.nodeInfo.GetID() break } lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second) err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, true, t0.Leader, toNode) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0) test.Nil(t, err) test.Equal(t, t0.Leader, toNode) // move non-leader to other non-isr node toNode = "" toNodeInvalid := "" fromNode := "" for _, nid := range t0.ISR { if nid != t0.Leader { fromNode = nid } } for _, node := range nodeInfoList { if FindSlice(t0.ISR, node.nodeInfo.GetID()) != -1 { continue } // check other partition t1, err := lookupLeadership.GetTopicInfo(topic_p2_r2, 1) if err == nil { toNodeInvalid = t1.Leader if FindSlice(t1.ISR, node.nodeInfo.GetID()) != -1 { continue } } toNode = node.nodeInfo.GetID() break } lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second) err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, false, fromNode, toNodeInvalid) test.NotNil(t, err) test.Equal(t, ErrNodeIsExcludedForTopicData, err) lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second) err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, false, fromNode, toNode) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0) test.Nil(t, err) test.Equal(t, FindSlice(t0.ISR, toNode) != -1, true) test.Equal(t, -1, FindSlice(t0.ISR, fromNode)) }
func TestNsqLookupExpandPartition(t *testing.T) { if testing.Verbose() { SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN) glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } else { SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG) } idList := []string{"id1", "id2", "id3", "id4", "id5", "id6"} lookupCoord, nodeInfoList := prepareCluster(t, idList, false) for _, n := range nodeInfoList { defer os.RemoveAll(n.dataPath) defer n.localNsqd.Exit() defer n.nsqdCoord.Stop() } topic_p1_r1 := "test-nsqlookup-topic-unit-test-expand-p1-r1" topic_p1_r2 := "test-nsqlookup-topic-unit-test-expand-p1-r2" topic_p1_r3 := "test-nsqlookup-topic-unit-test-expand-p1-r3" lookupLeadership := lookupCoord.leadership checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r2, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**")) time.Sleep(time.Second * 3) defer func() { checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r2, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**")) time.Sleep(time.Second * 3) lookupCoord.Stop() }() err := lookupCoord.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second) err = lookupCoord.CreateTopic(topic_p1_r2, TopicMetaInfo{1, 2, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second) err = lookupCoord.CreateTopic(topic_p1_r3, TopicMetaInfo{1, 3, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second) waitClusterStable(lookupCoord, time.Second) waitClusterStable(lookupCoord, time.Second*3) err = lookupCoord.ExpandTopicPartition(topic_p1_r1, 3) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), 1) t1, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 1) test.Nil(t, err) test.Equal(t, len(t1.ISR), 1) t2, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 2) test.Nil(t, err) test.Equal(t, len(t2.ISR), 1) lookupCoord.triggerCheckTopics("", 0, 0) waitClusterStable(lookupCoord, time.Second*3) err = lookupCoord.ExpandTopicPartition(topic_p1_r2, 2) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), t0.Replica) t1, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 1) test.Nil(t, err) test.Equal(t, len(t1.ISR), t1.Replica) lookupCoord.triggerCheckTopics("", 0, 0) waitClusterStable(lookupCoord, time.Second*3) err = lookupCoord.ExpandTopicPartition(topic_p1_r2, 3) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), t0.Replica) t1, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 1) test.Nil(t, err) test.Equal(t, len(t1.ISR), t1.Replica) t2, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 2) test.Nil(t, err) test.Equal(t, len(t2.ISR), t2.Replica) waitClusterStable(lookupCoord, time.Second*3) // should fail err = lookupCoord.ExpandTopicPartition(topic_p1_r2, 4) test.NotNil(t, err) err = lookupCoord.ExpandTopicPartition(topic_p1_r3, 2) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic_p1_r3, 0) test.Nil(t, err) test.Equal(t, len(t0.ISR), t0.Replica) t1, err = lookupLeadership.GetTopicInfo(topic_p1_r3, 1) test.Nil(t, err) test.Equal(t, len(t1.ISR), t1.Replica) waitClusterStable(lookupCoord, time.Second*3) // should fail err = lookupCoord.ExpandTopicPartition(topic_p1_r3, 3) test.NotNil(t, err) }
func TestNsqLookupMarkNodeRemove(t *testing.T) { if testing.Verbose() { SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN) glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } else { SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG) } idList := []string{"id1", "id2", "id3", "id4", "id5"} lookupCoord, nodeInfoList := prepareCluster(t, idList, false) for _, n := range nodeInfoList { defer os.RemoveAll(n.dataPath) defer n.localNsqd.Exit() defer n.nsqdCoord.Stop() } topic_p4_r1 := "test-nsqlookup-topic-unit-test-removenode-p4-r1" topic_p2_r2 := "test-nsqlookup-topic-unit-test-removenode-p2-r2" topic_p1_r3 := "test-nsqlookup-topic-unit-test-removenode-p1-r3" lookupLeadership := lookupCoord.leadership checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p4_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**")) time.Sleep(time.Second * 3) defer func() { checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p4_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**")) time.Sleep(time.Second * 3) lookupCoord.Stop() }() err := lookupCoord.CreateTopic(topic_p4_r1, TopicMetaInfo{4, 1, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second) err = lookupCoord.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second) err = lookupCoord.CreateTopic(topic_p1_r3, TopicMetaInfo{1, 3, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second) waitClusterStable(lookupCoord, time.Second*5) nid := "" for _, n := range nodeInfoList { nid = n.nodeInfo.GetID() break } err = lookupCoord.MarkNodeAsRemoving(nid) test.Nil(t, err) checkStart := time.Now() for time.Since(checkStart) < time.Minute*2 { time.Sleep(time.Second) isDone := true for i := 0; i < 4; i++ { info, err := lookupLeadership.GetTopicInfo(topic_p4_r1, i) test.Nil(t, err) if FindSlice(info.ISR, nid) != -1 { t.Logf("still waiting remove: %v", info) isDone = false break } } if !isDone { continue } time.Sleep(time.Second) for i := 0; i < 2; i++ { info, err := lookupLeadership.GetTopicInfo(topic_p2_r2, i) test.Nil(t, err) if FindSlice(info.ISR, nid) != -1 { t.Logf("still waiting remove: %v", info) isDone = false break } } if !isDone { continue } time.Sleep(time.Second) info, err := lookupLeadership.GetTopicInfo(topic_p1_r3, 0) test.Nil(t, err) if FindSlice(info.ISR, nid) != -1 { t.Logf("still waiting remove: %v from removing node", info) isDone = false } t.Logf("all done") if isDone { break } } for time.Since(checkStart) < time.Minute*2 { lookupCoord.nodesMutex.Lock() state := lookupCoord.removingNodes[nid] lookupCoord.nodesMutex.Unlock() if state == "data_transfered" || state == "done" { break } else { t.Logf("still waiting state: %v ", state) } time.Sleep(time.Second) } if time.Since(checkStart) >= time.Minute*2 { t.Error("remove node timeout") } }
func TestNsqLookupUpdateTopicMeta(t *testing.T) { if testing.Verbose() { SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN) glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } else { SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG) } idList := []string{"id1", "id2", "id3", "id4"} lookupCoord, nodeInfoList := prepareCluster(t, idList, false) for _, n := range nodeInfoList { defer os.RemoveAll(n.dataPath) defer n.localNsqd.Exit() defer n.nsqdCoord.Stop() } topic_p1_r1 := "test-nsqlookup-topic-unit-test-updatemeta-p1-r1" topic_p2_r1 := "test-nsqlookup-topic-unit-test-updatemeta-p2-r1" lookupLeadership := lookupCoord.leadership checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r1, "**")) time.Sleep(time.Second * 3) defer func() { checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**")) checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r1, "**")) time.Sleep(time.Second * 3) lookupCoord.Stop() }() err := lookupCoord.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0}) test.Nil(t, err) time.Sleep(time.Second) err = lookupCoord.CreateTopic(topic_p2_r1, TopicMetaInfo{2, 1, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord, time.Second*5) // test increase replicator and decrease the replicator err = lookupCoord.ChangeTopicMetaParam(topic_p1_r1, -1, -1, 3) lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second * 5) tmeta, _, _ := lookupLeadership.GetTopicMetaInfo(topic_p1_r1) test.Equal(t, 3, tmeta.Replica) for i := 0; i < tmeta.PartitionNum; i++ { info, err := lookupLeadership.GetTopicInfo(topic_p1_r1, i) test.Nil(t, err) test.Equal(t, tmeta.Replica, len(info.ISR)) } err = lookupCoord.ChangeTopicMetaParam(topic_p1_r1, -1, -1, 2) lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second * 3) tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p1_r1) test.Equal(t, 2, tmeta.Replica) for i := 0; i < tmeta.PartitionNum; i++ { info, err := lookupLeadership.GetTopicInfo(topic_p1_r1, i) test.Nil(t, err) test.Equal(t, tmeta.Replica, len(info.ISR)) } err = lookupCoord.ChangeTopicMetaParam(topic_p2_r1, -1, -1, 2) lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second * 3) tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p2_r1) test.Equal(t, 2, tmeta.Replica) for i := 0; i < tmeta.PartitionNum; i++ { info, err := lookupLeadership.GetTopicInfo(topic_p2_r1, i) test.Nil(t, err) test.Equal(t, tmeta.Replica, len(info.ISR)) } // should fail err = lookupCoord.ChangeTopicMetaParam(topic_p2_r1, -1, -1, 3) test.NotNil(t, err) err = lookupCoord.ChangeTopicMetaParam(topic_p2_r1, -1, -1, 1) lookupCoord.triggerCheckTopics("", 0, 0) time.Sleep(time.Second * 3) tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p2_r1) test.Equal(t, 1, tmeta.Replica) for i := 0; i < tmeta.PartitionNum; i++ { info, err := lookupLeadership.GetTopicInfo(topic_p2_r1, i) test.Nil(t, err) test.Equal(t, tmeta.Replica, len(info.ISR)) } // test update the sync and retention , all partition and replica should be updated err = lookupCoord.ChangeTopicMetaParam(topic_p1_r1, 1234, 3, -1) time.Sleep(time.Second) tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p1_r1) test.Equal(t, 1234, tmeta.SyncEvery) test.Equal(t, int32(3), tmeta.RetentionDay) for i := 0; i < tmeta.PartitionNum; i++ { info, err := lookupLeadership.GetTopicInfo(topic_p1_r1, i) test.Nil(t, err) for _, nid := range info.ISR { localNsqd := nodeInfoList[nid].localNsqd localTopic, err := localNsqd.GetExistingTopic(topic_p1_r1, i) test.Nil(t, err) dinfo := localTopic.GetDynamicInfo() test.Equal(t, int64(1234), dinfo.SyncEvery) test.Equal(t, int32(3), dinfo.RetentionDay) } } }
func main() { glog.InitWithFlag(flagSet) flagSet.Parse(os.Args[1:]) glog.StartWorker(time.Second) if *ordered { *trace = true } config = nsq.NewConfig() config.MsgTimeout = time.Second * time.Duration(10*(*channelNum)) if config.MsgTimeout >= time.Second*200 { config.MsgTimeout = time.Second * 200 } config.DefaultRequeueDelay = time.Second * 30 config.MaxRequeueDelay = time.Second * 60 config.MaxInFlight = 20 config.EnableTrace = *trace config.EnableOrdered = *ordered log.SetPrefix("[bench_writer] ") dumpCheck = make(map[string]map[uint64]*nsq.Message, 5) pubRespCheck = make(map[string]map[uint64]pubResp, 5) orderCheck = make(map[string]pubResp) traceIDWaitingList = make(map[string]map[uint64]*nsq.Message, 5) pubTraceFailedList = make(map[string]map[uint64]int64) topicMutex = make(map[string]*sync.Mutex) if *topicListFile != "" { f, err := os.Open(*topicListFile) if err != nil { log.Printf("load topic list file error: %v", err) } else { scanner := bufio.NewScanner(f) for scanner.Scan() { line := scanner.Text() line = strings.TrimSpace(line) topics = append(topics, line) } } } log.Printf("testing topic list: %v", topics) msg := make([]byte, *size) batch := make([][]byte, *batchSize) for i := range batch { batch[i] = msg } if *benchCase == "simple" { startSimpleTest(msg, batch) } else if *benchCase == "benchpub" { startBenchPub(msg, batch) } else if *benchCase == "benchsub" { startBenchSub() } else if *benchCase == "checkdata" { startCheckData(msg, batch) } else if *benchCase == "benchlookup" { startBenchLookup() } else if *benchCase == "benchreg" { startBenchLookupRegUnreg() } else if *benchCase == "consumeoffset" { startCheckSetConsumerOffset() } else if *benchCase == "checkdata2" { startCheckData2() } }
func TestTopicResetWithQueueStart(t *testing.T) { opts := NewOptions() opts.Logger = newTestLogger(t) if testing.Verbose() { opts.Logger = &levellogger.GLogger{} opts.LogLevel = 3 glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } opts.MaxBytesPerFile = 1024 * 1024 _, _, nsqd := mustStartNSQD(opts) defer os.RemoveAll(opts.DataPath) defer nsqd.Exit() topic := nsqd.GetTopic("test", 0) topic.dynamicConf.AutoCommit = 1 topic.dynamicConf.SyncEvery = 10 msgNum := 5000 channel := topic.GetChannel("ch") test.NotNil(t, channel) msg := NewMessage(0, make([]byte, 1000)) msg.Timestamp = time.Now().Add(-1 * time.Hour * time.Duration(24*4)).UnixNano() msgSize := int32(0) var dend BackendQueueEnd for i := 0; i <= msgNum; i++ { msg.ID = 0 _, _, msgSize, dend, _ = topic.PutMessage(msg) msg.Timestamp = time.Now().Add(-1 * time.Hour * 24 * time.Duration(4-dend.(*diskQueueEndInfo).EndOffset.FileNum)).UnixNano() } topic.ForceFlush() fileNum := topic.backend.diskWriteEnd.EndOffset.FileNum test.Equal(t, int64(0), topic.backend.GetQueueReadStart().(*diskQueueEndInfo).EndOffset.FileNum) test.Equal(t, true, fileNum >= 4) nsqLog.Warningf("reading the topic %v backend ", topic.GetFullName()) for i := 0; i < 100; i++ { msg := <-channel.clientMsgChan channel.ConfirmBackendQueue(msg) } topic.dynamicConf.RetentionDay = 2 oldEnd := topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo) // reset with new start resetStart := &diskQueueEndInfo{} resetStart.virtualEnd = topic.backend.GetQueueWriteEnd().Offset() + BackendOffset(msgSize*10) resetStart.totalMsgCnt = topic.backend.GetQueueWriteEnd().TotalMsgCnt() + 10 err := topic.ResetBackendWithQueueStartNoLock(int64(resetStart.Offset()), resetStart.TotalMsgCnt()) test.NotNil(t, err) topic.DisableForSlave() err = topic.ResetBackendWithQueueStartNoLock(int64(resetStart.Offset()), resetStart.TotalMsgCnt()) test.Nil(t, err) topic.EnableForMaster() nsqLog.Warningf("reset the topic %v backend with queue start: %v", topic.GetFullName(), resetStart) test.Equal(t, resetStart.Offset(), BackendOffset(topic.GetQueueReadStart())) newEnd := topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo) test.Equal(t, resetStart.Offset(), newEnd.Offset()) test.Equal(t, resetStart.TotalMsgCnt(), newEnd.TotalMsgCnt()) test.Equal(t, true, newEnd.EndOffset.GreatThan(&oldEnd.EndOffset)) test.Equal(t, int64(0), newEnd.EndOffset.Pos) test.Equal(t, resetStart.Offset(), channel.GetConfirmed().Offset()) test.Equal(t, resetStart.TotalMsgCnt(), channel.GetChannelEnd().TotalMsgCnt()) for i := 0; i < msgNum; i++ { msg.ID = 0 _, _, msgSize, _, _ = topic.PutMessage(msg) } topic.ForceFlush() newEnd = topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo) test.Equal(t, resetStart.TotalMsgCnt()+int64(msgNum), newEnd.TotalMsgCnt()) for i := 0; i < 100; i++ { msg := <-channel.clientMsgChan channel.ConfirmBackendQueue(msg) test.Equal(t, msg.offset+msg.rawMoveSize, channel.GetConfirmed().Offset()) } // reset with old start topic.DisableForSlave() err = topic.ResetBackendWithQueueStartNoLock(int64(resetStart.Offset()), resetStart.TotalMsgCnt()) test.Nil(t, err) topic.EnableForMaster() test.Equal(t, resetStart.Offset(), BackendOffset(topic.GetQueueReadStart())) newEnd = topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo) test.Equal(t, resetStart.Offset(), newEnd.Offset()) test.Equal(t, resetStart.TotalMsgCnt(), newEnd.TotalMsgCnt()) test.Equal(t, true, newEnd.EndOffset.GreatThan(&oldEnd.EndOffset)) test.Equal(t, int64(0), newEnd.EndOffset.Pos) test.Equal(t, resetStart.Offset(), channel.GetConfirmed().Offset()) test.Equal(t, resetStart.TotalMsgCnt(), channel.GetChannelEnd().TotalMsgCnt()) for i := 0; i < msgNum; i++ { msg.ID = 0 _, _, msgSize, dend, _ = topic.PutMessage(msg) msg.Timestamp = time.Now().Add(-1 * time.Hour * 24 * time.Duration(4-dend.(*diskQueueEndInfo).EndOffset.FileNum)).UnixNano() } topic.ForceFlush() newEnd = topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo) test.Equal(t, resetStart.TotalMsgCnt()+int64(msgNum), newEnd.TotalMsgCnt()) for i := 0; i < 100; i++ { msg := <-channel.clientMsgChan channel.ConfirmBackendQueue(msg) test.Equal(t, msg.offset+msg.rawMoveSize, channel.GetConfirmed().Offset()) } }