Пример #1
0
func main() {
	defer glog.Flush()
	prg := &program{}
	if err := svc.Run(prg, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGINT); err != nil {
		log.Fatal(err)
	}
}
Пример #2
0
func main() {
	glog.InitWithFlag(flagSet)
	flagSet.Parse(os.Args[1:])

	if *showVersion {
		fmt.Println(version.String("nsqadmin"))
		return
	}
	defer glog.Flush()

	if *templateDir != "" {
		log.Printf("WARNING: --template-dir is deprecated and will be removed in the next release (templates are now compiled into the binary)")
	}

	exitChan := make(chan int)
	signalChan := make(chan os.Signal, 1)
	go func() {
		<-signalChan
		exitChan <- 1
	}()
	signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)

	var cfg map[string]interface{}
	if *config != "" {
		_, err := toml.DecodeFile(*config, &cfg)
		if err != nil {
			log.Fatalf("ERROR: failed to load config file %s - %s", *config, err)
		}
	}

	opts := nsqadmin.NewOptions()
	options.Resolve(opts, flagSet, cfg)
	if opts.LogDir != "" {
		glog.SetGLogDir(opts.LogDir)
	}
	glog.StartWorker(time.Second * 2)

	nsqadmin := nsqadmin.New(opts)

	nsqadmin.Main()
	<-exitChan
	nsqadmin.Exit()
}
func testNsqLookupNsqdNodesChange(t *testing.T, useFakeLeadership bool) {
	if testing.Verbose() {
		SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_INFO)
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	} else {
		SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG)
	}
	idList := []string{"id1", "id2", "id3", "id4", "id5"}
	lookupCoord1, nodeInfoList := prepareCluster(t, idList, useFakeLeadership)
	for _, n := range nodeInfoList {
		defer os.RemoveAll(n.dataPath)
		defer n.localNsqd.Exit()
		defer n.nsqdCoord.Stop()
	}

	topic := "test-nsqlookup-topic-unit-test"
	lookupLeadership := lookupCoord1.leadership

	lookupCoord1.DeleteTopic(topic, "**")
	topic3 := topic + topic
	lookupCoord1.DeleteTopic(topic3, "**")
	time.Sleep(time.Second)
	defer func() {
		lookupCoord1.DeleteTopic(topic, "**")
		lookupCoord1.DeleteTopic(topic3, "**")
		time.Sleep(time.Second * 3)
		lookupCoord1.Stop()
	}()

	// test new topic create
	err := lookupCoord1.CreateTopic(topic, TopicMetaInfo{2, 2, 0, 0, 0, 0})
	test.Nil(t, err)

	waitClusterStable(lookupCoord1, time.Second*3)

	pmeta, _, err := lookupLeadership.GetTopicMetaInfo(topic)
	pn := pmeta.PartitionNum
	test.Nil(t, err)
	test.Equal(t, pn, 2)
	t0, err := lookupLeadership.GetTopicInfo(topic, 0)
	test.Nil(t, err)
	t1, err := lookupLeadership.GetTopicInfo(topic, 1)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 2)
	test.Equal(t, len(t1.ISR), 2)
	t.Log(t0)
	t.Log(t1)
	test.NotEqual(t, t0.Leader, t1.Leader)

	t0LeaderCoord := nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr := t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	test.Equal(t, len(tc0.topicInfo.ISR), 2)

	t1LeaderCoord := nodeInfoList[t1.Leader].nsqdCoord
	test.NotNil(t, t1LeaderCoord)
	tc1, coordErr := t1LeaderCoord.getTopicCoord(topic, 1)
	test.Nil(t, coordErr)
	test.Equal(t, tc1.topicInfo.Leader, t1.Leader)
	test.Equal(t, len(tc1.topicInfo.ISR), 2)

	coordLog.Warningf("============= begin test isr node failed  ====")
	// test isr node lost
	lostNodeID := t0.ISR[1]
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, err = lookupLeadership.GetTopicInfo(topic, 0)
	test.Nil(t, err)
	test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true)
	test.Equal(t, len(t0.ISR), t0.Replica)
	test.Equal(t, t0.Leader, t0.ISR[0])

	// clear topic info on failed node, test the reload for failed node
	nodeInfoList[lostNodeID].nsqdCoord.topicCoords = make(map[string]map[int]*TopicCoordinator)

	// test new catchup and new isr
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	test.Equal(t, len(t0.CatchupList), 0)
	test.Equal(t, len(t0.ISR) >= t0.Replica, true)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, t0.Leader, t0.ISR[0])
	lookupCoord1.triggerCheckTopics("", 0, time.Second)
	time.Sleep(time.Second)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	// should remove the unnecessary node
	test.Equal(t, len(t0.ISR), t0.Replica)

	coordLog.Warningf("============= begin test leader failed  ====")
	// test leader node lost
	lostNodeID = t0.Leader
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	t.Log(t0)
	test.Equal(t, t0.Replica, len(t0.ISR))
	test.Equal(t, t0.Leader, t0.ISR[0])
	test.NotEqual(t, t0.Leader, lostNodeID)
	//test.Equal(t, len(t0.CatchupList), 1)
	test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true)
	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)

	// test lost leader node rejoin
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	t.Log(t0)

	test.Equal(t, len(t0.CatchupList), 0)
	test.Equal(t, len(t0.ISR) >= t0.Replica, true)
	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	// should remove the unnecessary node
	test.Equal(t, len(t0.ISR), t0.Replica)

	// test old leader failed and begin elect new and then new leader failed
	coordLog.Warningf("============= begin test old leader failed and then new leader failed ====")
	lostNodeID = t0.Leader
	lostISRID := t0.ISR[1]
	if lostISRID == lostNodeID {
		lostISRID = t0.ISR[0]
	}
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	time.Sleep(time.Millisecond)
	atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 1)
	nodeInfoList[lostISRID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostISRID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	nodeInfoList[lostISRID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostISRID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	waitClusterStable(lookupCoord1, time.Second*5)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	test.Equal(t, true, len(t0.ISR) >= t0.Replica)
	test.Equal(t, t0.Leader == t0.ISR[0] || t0.Leader == t0.ISR[1], true)

	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	waitClusterStable(lookupCoord1, time.Second*5)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	// should remove the unnecessary node
	test.Equal(t, t0.Replica, len(t0.ISR))

	// test join isr timeout
	lostNodeID = t1.ISR[1]
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	// with only 2 replica, the isr join fail should not change the isr list
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true)
	waitClusterStable(lookupCoord1, time.Second*10)
	t1, _ = lookupLeadership.GetTopicInfo(topic, 1)
	test.Equal(t, true, len(t1.ISR)+len(t1.CatchupList) >= t1.Replica)
	test.Equal(t, t1.Leader == t1.ISR[0] || t1.Leader == t1.ISR[1], true)
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false)
	waitClusterStable(lookupCoord1, time.Second*5)
	// test new topic create
	coordLog.Warningf("============= begin test 3 replicas ====")
	err = lookupCoord1.CreateTopic(topic3, TopicMetaInfo{1, 3, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord1, time.Second*5)
	// with 3 replica, the isr join timeout will change the isr list if the isr has the quorum nodes
	t3, err := lookupLeadership.GetTopicInfo(topic3, 0)
	test.Nil(t, err)
	test.Equal(t, len(t3.ISR), t3.Replica)
	lostNodeID = t3.ISR[1]
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true)
	waitClusterStable(lookupCoord1, time.Second*5)
	t3, _ = lookupLeadership.GetTopicInfo(topic3, 0)
	test.Equal(t, true, len(t3.ISR) >= t3.Replica-1)
	test.Equal(t, true, len(t3.ISR) <= t3.Replica)
	test.Equal(t, t3.Leader == t3.ISR[0] || t3.Leader == t3.ISR[1], true)
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false)
	waitClusterStable(lookupCoord1, time.Second*5)
	glog.Flush()
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	test.Equal(t, true, len(t0.ISR) >= t0.Replica)
	t1, _ = lookupLeadership.GetTopicInfo(topic, 1)
	test.Equal(t, true, len(t1.ISR) >= t0.Replica)
	// before migrate really start, the isr should not reach the replica factor
	// however, catch up may start early while check leadership or enable topic write
	t3, _ = lookupLeadership.GetTopicInfo(topic3, 0)
	test.Equal(t, true, len(t3.ISR)+len(t3.CatchupList) >= t3.Replica)

	t0IsrNum := 2
	t1IsrNum := 2
	coordLog.Warningf("========== begin test quit ====")

	quitList := make([]*NsqdCoordinator, 0)
	quitList = append(quitList, nodeInfoList[t0.Leader].nsqdCoord)
	if t1.Leader != t0.Leader {
		quitList = append(quitList, nodeInfoList[t1.Leader].nsqdCoord)
	}
	if t3.Leader != t0.Leader && t3.Leader != t1.Leader {
		quitList = append(quitList, nodeInfoList[t3.Leader].nsqdCoord)
	}
	for id, n := range nodeInfoList {
		if id == t0.Leader || id == t1.Leader || id == t3.Leader {
			continue
		}
		quitList = append(quitList, n.nsqdCoord)
	}
	test.Equal(t, len(nodeInfoList), len(quitList))

	for _, nsqdCoord := range quitList {
		failedID := nsqdCoord.myNode.GetID()
		delete(nodeInfoList, failedID)
		nsqdCoord.Stop()
		if t0IsrNum > 1 {
			if FindSlice(t0.ISR, failedID) != -1 {
				t0IsrNum--
			}
		}
		if t1IsrNum > 1 {
			if FindSlice(t1.ISR, failedID) != -1 {
				t1IsrNum--
			}
		}

		waitClusterStable(lookupCoord1, time.Second*5)
		t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
		// we have no failed node in isr or we got the last failed node leaving in isr.
		t.Log(t0)
		test.Equal(t, FindSlice(t0.ISR, failedID) == -1 || (len(t0.ISR) == 1 && t0.ISR[0] == failedID), true)
		test.Equal(t, true, len(t0.ISR) >= t0IsrNum)
		t1, _ = lookupLeadership.GetTopicInfo(topic, 1)
		t.Log(t1)
		test.Equal(t, FindSlice(t1.ISR, failedID) == -1 || (len(t1.ISR) == 1 && t1.ISR[0] == failedID), true)
		test.Equal(t, true, len(t1.ISR) >= t1IsrNum)
		t3, _ = lookupLeadership.GetTopicInfo(topic3, 0)
		t.Log(t3)
		test.Equal(t, FindSlice(t3.ISR, failedID) == -1 || (len(t3.ISR) == 1 && t3.ISR[0] == failedID), true)
	}
}