func main() { defer glog.Flush() prg := &program{} if err := svc.Run(prg, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGINT); err != nil { log.Fatal(err) } }
func main() { glog.InitWithFlag(flagSet) flagSet.Parse(os.Args[1:]) if *showVersion { fmt.Println(version.String("nsqadmin")) return } defer glog.Flush() if *templateDir != "" { log.Printf("WARNING: --template-dir is deprecated and will be removed in the next release (templates are now compiled into the binary)") } exitChan := make(chan int) signalChan := make(chan os.Signal, 1) go func() { <-signalChan exitChan <- 1 }() signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) var cfg map[string]interface{} if *config != "" { _, err := toml.DecodeFile(*config, &cfg) if err != nil { log.Fatalf("ERROR: failed to load config file %s - %s", *config, err) } } opts := nsqadmin.NewOptions() options.Resolve(opts, flagSet, cfg) if opts.LogDir != "" { glog.SetGLogDir(opts.LogDir) } glog.StartWorker(time.Second * 2) nsqadmin := nsqadmin.New(opts) nsqadmin.Main() <-exitChan nsqadmin.Exit() }
func testNsqLookupNsqdNodesChange(t *testing.T, useFakeLeadership bool) { if testing.Verbose() { SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_INFO) glog.SetFlags(0, "", "", true, true, 1) glog.StartWorker(time.Second) } else { SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG) } idList := []string{"id1", "id2", "id3", "id4", "id5"} lookupCoord1, nodeInfoList := prepareCluster(t, idList, useFakeLeadership) for _, n := range nodeInfoList { defer os.RemoveAll(n.dataPath) defer n.localNsqd.Exit() defer n.nsqdCoord.Stop() } topic := "test-nsqlookup-topic-unit-test" lookupLeadership := lookupCoord1.leadership lookupCoord1.DeleteTopic(topic, "**") topic3 := topic + topic lookupCoord1.DeleteTopic(topic3, "**") time.Sleep(time.Second) defer func() { lookupCoord1.DeleteTopic(topic, "**") lookupCoord1.DeleteTopic(topic3, "**") time.Sleep(time.Second * 3) lookupCoord1.Stop() }() // test new topic create err := lookupCoord1.CreateTopic(topic, TopicMetaInfo{2, 2, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*3) pmeta, _, err := lookupLeadership.GetTopicMetaInfo(topic) pn := pmeta.PartitionNum test.Nil(t, err) test.Equal(t, pn, 2) t0, err := lookupLeadership.GetTopicInfo(topic, 0) test.Nil(t, err) t1, err := lookupLeadership.GetTopicInfo(topic, 1) test.Nil(t, err) test.Equal(t, len(t0.ISR), 2) test.Equal(t, len(t1.ISR), 2) t.Log(t0) t.Log(t1) test.NotEqual(t, t0.Leader, t1.Leader) t0LeaderCoord := nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr := t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) test.Equal(t, len(tc0.topicInfo.ISR), 2) t1LeaderCoord := nodeInfoList[t1.Leader].nsqdCoord test.NotNil(t, t1LeaderCoord) tc1, coordErr := t1LeaderCoord.getTopicCoord(topic, 1) test.Nil(t, coordErr) test.Equal(t, tc1.topicInfo.Leader, t1.Leader) test.Equal(t, len(tc1.topicInfo.ISR), 2) coordLog.Warningf("============= begin test isr node failed ====") // test isr node lost lostNodeID := t0.ISR[1] atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, err = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, err = lookupLeadership.GetTopicInfo(topic, 0) test.Nil(t, err) test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true) test.Equal(t, len(t0.ISR), t0.Replica) test.Equal(t, t0.Leader, t0.ISR[0]) // clear topic info on failed node, test the reload for failed node nodeInfoList[lostNodeID].nsqdCoord.topicCoords = make(map[string]map[int]*TopicCoordinator) // test new catchup and new isr atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, _ = lookupLeadership.GetTopicInfo(topic, 0) test.Equal(t, len(t0.CatchupList), 0) test.Equal(t, len(t0.ISR) >= t0.Replica, true) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, t0.Leader, t0.ISR[0]) lookupCoord1.triggerCheckTopics("", 0, time.Second) time.Sleep(time.Second) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // should remove the unnecessary node test.Equal(t, len(t0.ISR), t0.Replica) coordLog.Warningf("============= begin test leader failed ====") // test leader node lost lostNodeID = t0.Leader atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, _ = lookupLeadership.GetTopicInfo(topic, 0) t.Log(t0) test.Equal(t, t0.Replica, len(t0.ISR)) test.Equal(t, t0.Leader, t0.ISR[0]) test.NotEqual(t, t0.Leader, lostNodeID) //test.Equal(t, len(t0.CatchupList), 1) test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true) t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) // test lost leader node rejoin atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) if len(t0.ISR) < t0.Replica { waitClusterStable(lookupCoord1, time.Second*3) } t0, _ = lookupLeadership.GetTopicInfo(topic, 0) t.Log(t0) test.Equal(t, len(t0.CatchupList), 0) test.Equal(t, len(t0.ISR) >= t0.Replica, true) t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) waitClusterStable(lookupCoord1, time.Second*3) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // should remove the unnecessary node test.Equal(t, len(t0.ISR), t0.Replica) // test old leader failed and begin elect new and then new leader failed coordLog.Warningf("============= begin test old leader failed and then new leader failed ====") lostNodeID = t0.Leader lostISRID := t0.ISR[1] if lostISRID == lostNodeID { lostISRID = t0.ISR[0] } atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) time.Sleep(time.Millisecond) atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 1) nodeInfoList[lostISRID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostISRID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) nodeInfoList[lostISRID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostISRID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*3) waitClusterStable(lookupCoord1, time.Second*5) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) test.Equal(t, true, len(t0.ISR) >= t0.Replica) test.Equal(t, t0.Leader == t0.ISR[0] || t0.Leader == t0.ISR[1], true) t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord test.NotNil(t, t0LeaderCoord) tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0) test.Nil(t, coordErr) test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR)) test.Equal(t, tc0.topicInfo.Leader, t0.Leader) waitClusterStable(lookupCoord1, time.Second*5) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // should remove the unnecessary node test.Equal(t, t0.Replica, len(t0.ISR)) // test join isr timeout lostNodeID = t1.ISR[1] atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) // with only 2 replica, the isr join fail should not change the isr list nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true) waitClusterStable(lookupCoord1, time.Second*10) t1, _ = lookupLeadership.GetTopicInfo(topic, 1) test.Equal(t, true, len(t1.ISR)+len(t1.CatchupList) >= t1.Replica) test.Equal(t, t1.Leader == t1.ISR[0] || t1.Leader == t1.ISR[1], true) nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false) waitClusterStable(lookupCoord1, time.Second*5) // test new topic create coordLog.Warningf("============= begin test 3 replicas ====") err = lookupCoord1.CreateTopic(topic3, TopicMetaInfo{1, 3, 0, 0, 0, 0}) test.Nil(t, err) waitClusterStable(lookupCoord1, time.Second*5) // with 3 replica, the isr join timeout will change the isr list if the isr has the quorum nodes t3, err := lookupLeadership.GetTopicInfo(topic3, 0) test.Nil(t, err) test.Equal(t, len(t3.ISR), t3.Replica) lostNodeID = t3.ISR[1] atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1) nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0) nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo) waitClusterStable(lookupCoord1, time.Second*5) nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true) waitClusterStable(lookupCoord1, time.Second*5) t3, _ = lookupLeadership.GetTopicInfo(topic3, 0) test.Equal(t, true, len(t3.ISR) >= t3.Replica-1) test.Equal(t, true, len(t3.ISR) <= t3.Replica) test.Equal(t, t3.Leader == t3.ISR[0] || t3.Leader == t3.ISR[1], true) nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false) waitClusterStable(lookupCoord1, time.Second*5) glog.Flush() t0, _ = lookupLeadership.GetTopicInfo(topic, 0) test.Equal(t, true, len(t0.ISR) >= t0.Replica) t1, _ = lookupLeadership.GetTopicInfo(topic, 1) test.Equal(t, true, len(t1.ISR) >= t0.Replica) // before migrate really start, the isr should not reach the replica factor // however, catch up may start early while check leadership or enable topic write t3, _ = lookupLeadership.GetTopicInfo(topic3, 0) test.Equal(t, true, len(t3.ISR)+len(t3.CatchupList) >= t3.Replica) t0IsrNum := 2 t1IsrNum := 2 coordLog.Warningf("========== begin test quit ====") quitList := make([]*NsqdCoordinator, 0) quitList = append(quitList, nodeInfoList[t0.Leader].nsqdCoord) if t1.Leader != t0.Leader { quitList = append(quitList, nodeInfoList[t1.Leader].nsqdCoord) } if t3.Leader != t0.Leader && t3.Leader != t1.Leader { quitList = append(quitList, nodeInfoList[t3.Leader].nsqdCoord) } for id, n := range nodeInfoList { if id == t0.Leader || id == t1.Leader || id == t3.Leader { continue } quitList = append(quitList, n.nsqdCoord) } test.Equal(t, len(nodeInfoList), len(quitList)) for _, nsqdCoord := range quitList { failedID := nsqdCoord.myNode.GetID() delete(nodeInfoList, failedID) nsqdCoord.Stop() if t0IsrNum > 1 { if FindSlice(t0.ISR, failedID) != -1 { t0IsrNum-- } } if t1IsrNum > 1 { if FindSlice(t1.ISR, failedID) != -1 { t1IsrNum-- } } waitClusterStable(lookupCoord1, time.Second*5) t0, _ = lookupLeadership.GetTopicInfo(topic, 0) // we have no failed node in isr or we got the last failed node leaving in isr. t.Log(t0) test.Equal(t, FindSlice(t0.ISR, failedID) == -1 || (len(t0.ISR) == 1 && t0.ISR[0] == failedID), true) test.Equal(t, true, len(t0.ISR) >= t0IsrNum) t1, _ = lookupLeadership.GetTopicInfo(topic, 1) t.Log(t1) test.Equal(t, FindSlice(t1.ISR, failedID) == -1 || (len(t1.ISR) == 1 && t1.ISR[0] == failedID), true) test.Equal(t, true, len(t1.ISR) >= t1IsrNum) t3, _ = lookupLeadership.GetTopicInfo(topic3, 0) t.Log(t3) test.Equal(t, FindSlice(t3.ISR, failedID) == -1 || (len(t3.ISR) == 1 && t3.ISR[0] == failedID), true) } }