예제 #1
0
func (p *program) Start() error {
	glog.InitWithFlag(flagSet)

	flagSet.Parse(os.Args[1:])

	if *showVersion {
		fmt.Println(version.String("nsqlookupd"))
		os.Exit(0)
	}

	var cfg map[string]interface{}
	if *config != "" {
		_, err := toml.DecodeFile(*config, &cfg)
		if err != nil {
			log.Fatalf("ERROR: failed to load config file %s - %s", *config, err.Error())
		}
	}

	opts := nsqlookupd.NewOptions()
	options.Resolve(opts, flagSet, cfg)
	if opts.LogDir != "" {
		glog.SetGLogDir(opts.LogDir)
	}
	nsqlookupd.SetLogger(opts)
	glog.StartWorker(time.Second * 2)
	daemon := nsqlookupd.New(opts)

	daemon.Main()
	p.nsqlookupd = daemon
	return nil
}
예제 #2
0
파일: main.go 프로젝트: absolute8511/nsq
func main() {
	glog.InitWithFlag(flagSet)
	flagSet.Parse(os.Args[1:])

	if *showVersion {
		fmt.Println(version.String("nsqadmin"))
		return
	}
	defer glog.Flush()

	if *templateDir != "" {
		log.Printf("WARNING: --template-dir is deprecated and will be removed in the next release (templates are now compiled into the binary)")
	}

	exitChan := make(chan int)
	signalChan := make(chan os.Signal, 1)
	go func() {
		<-signalChan
		exitChan <- 1
	}()
	signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)

	var cfg map[string]interface{}
	if *config != "" {
		_, err := toml.DecodeFile(*config, &cfg)
		if err != nil {
			log.Fatalf("ERROR: failed to load config file %s - %s", *config, err)
		}
	}

	opts := nsqadmin.NewOptions()
	options.Resolve(opts, flagSet, cfg)
	if opts.LogDir != "" {
		glog.SetGLogDir(opts.LogDir)
	}
	glog.StartWorker(time.Second * 2)

	nsqadmin := nsqadmin.New(opts)

	nsqadmin.Main()
	<-exitChan
	nsqadmin.Exit()
}
예제 #3
0
func mustStartNSQD(opts *nsqdNs.Options) (*net.TCPAddr, *net.TCPAddr, *nsqdNs.NSQD, *NsqdServer) {
	opts.TCPAddress = "127.0.0.1:0"
	opts.HTTPAddress = "127.0.0.1:0"
	opts.HTTPSAddress = "127.0.0.1:0"
	if opts.DataPath == "" {
		tmpDir, err := ioutil.TempDir("", fmt.Sprintf("nsq-test-%d", time.Now().UnixNano()))
		if err != nil {
			panic(err)
		}
		opts.DataPath = tmpDir
	}
	if opts.LogDir == "" {
		opts.LogDir = opts.DataPath
	}
	glog.SetGLogDir(opts.LogDir)
	glog.StartWorker(time.Second)
	_, nsqdServer := NewNsqdServer(opts)
	nsqdServer.Main()
	return nsqdServer.ctx.realTCPAddr(), nsqdServer.ctx.realHTTPAddr(), nsqdServer.ctx.nsqd, nsqdServer
}
func TestNsqLookupNsqdCreateTopic(t *testing.T) {
	// on 4 nodes, we should test follow cases
	// 1 partition 1 replica
	// 1 partition 3 replica
	// 3 partition 1 replica
	// 2 partition 2 replica
	if testing.Verbose() {
		SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN)
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	} else {
		SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG)
	}
	idList := []string{"id1", "id2", "id3", "id4"}
	lookupCoord1, nodeInfoList := prepareCluster(t, idList, false)
	for _, n := range nodeInfoList {
		defer os.RemoveAll(n.dataPath)
		defer n.localNsqd.Exit()
		defer n.nsqdCoord.Stop()
	}
	test.Equal(t, 4, len(nodeInfoList))

	topic_p1_r1 := "test-nsqlookup-topic-unit-testcreate-p1-r1"
	topic_p1_r3 := "test-nsqlookup-topic-unit-testcreate-p1-r3"
	topic_p3_r1 := "test-nsqlookup-topic-unit-testcreate-p3-r1"
	topic_p2_r2 := "test-nsqlookup-topic-unit-testcreate-p2-r2"

	lookupLeadership := lookupCoord1.leadership

	time.Sleep(time.Second)
	checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r1, "**"))
	checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r3, "**"))
	checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p3_r1, "**"))
	checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p2_r2, "**"))
	time.Sleep(time.Second * 3)
	defer func() {
		checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r1, "**"))
		checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p1_r3, "**"))
		checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p3_r1, "**"))
		checkDeleteErr(t, lookupCoord1.DeleteTopic(topic_p2_r2, "**"))
		time.Sleep(time.Second * 3)

		lookupCoord1.Stop()
	}()

	// test new topic create
	err := lookupCoord1.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord1, time.Second*3)
	pmeta, _, err := lookupLeadership.GetTopicMetaInfo(topic_p1_r1)
	pn := pmeta.PartitionNum
	test.Nil(t, err)
	test.Equal(t, pn, 1)
	t0, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 1)

	t.Logf("t0 leader is: %v", t0.Leader)
	if nodeInfoList[t0.Leader] == nil {
		t.Fatalf("no leader: %v, %v", t0, nodeInfoList)
	}
	t0LeaderCoord := nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr := t0LeaderCoord.getTopicCoord(topic_p1_r1, 0)
	test.Nil(t, coordErr)
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	test.Equal(t, len(tc0.topicInfo.ISR), 1)

	err = lookupCoord1.CreateTopic(topic_p1_r3, TopicMetaInfo{1, 3, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord1, time.Second*5)
	lookupCoord1.triggerCheckTopics("", 0, 0)
	waitClusterStable(lookupCoord1, time.Second*5)
	pmeta, _, err = lookupLeadership.GetTopicMetaInfo(topic_p1_r3)
	pn = pmeta.PartitionNum
	test.Nil(t, err)
	test.Equal(t, pn, 1)
	t0, err = lookupLeadership.GetTopicInfo(topic_p1_r3, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 3)

	t.Logf("t0 leader is: %v", t0.Leader)
	if nodeInfoList[t0.Leader] == nil {
		t.Fatalf("no leader: %v, %v", t0, nodeInfoList)
	}

	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic_p1_r3, 0)
	test.Nil(t, coordErr)
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	test.Equal(t, len(tc0.topicInfo.ISR), 3)

	err = lookupCoord1.CreateTopic(topic_p3_r1, TopicMetaInfo{3, 1, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord1, time.Second*2)
	waitClusterStable(lookupCoord1, time.Second*5)
	pmeta, _, err = lookupLeadership.GetTopicMetaInfo(topic_p3_r1)
	pn = pmeta.PartitionNum
	test.Nil(t, err)
	test.Equal(t, pn, 3)
	t0, err = lookupLeadership.GetTopicInfo(topic_p3_r1, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 1)

	t.Logf("t0 leader is: %v", t0.Leader)
	if nodeInfoList[t0.Leader] == nil {
		t.Fatalf("no leader: %v, %v", t0, nodeInfoList)
	}

	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic_p3_r1, 0)
	test.Nil(t, coordErr)
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	test.Equal(t, len(tc0.topicInfo.ISR), 1)

	t1, err := lookupLeadership.GetTopicInfo(topic_p3_r1, 1)
	t1LeaderCoord := nodeInfoList[t1.Leader].nsqdCoord
	test.NotNil(t, t1LeaderCoord)
	tc1, coordErr := t1LeaderCoord.getTopicCoord(topic_p3_r1, 1)
	test.Nil(t, coordErr)
	test.Equal(t, tc1.topicInfo.Leader, t1.Leader)
	test.Equal(t, len(tc1.topicInfo.ISR), 1)

	err = lookupCoord1.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord1, time.Second*3)
	waitClusterStable(lookupCoord1, time.Second*5)
	pmeta, _, err = lookupLeadership.GetTopicMetaInfo(topic_p2_r2)
	pn = pmeta.PartitionNum
	test.Nil(t, err)
	test.Equal(t, pn, 2)
	t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 2)

	t.Logf("t0 leader is: %v", t0.Leader)
	if nodeInfoList[t0.Leader] == nil {
		t.Fatalf("no leader: %v, %v", t0, nodeInfoList)
	}

	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic_p2_r2, 0)
	test.Nil(t, coordErr)
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	test.Equal(t, len(tc0.topicInfo.ISR), 2)

	t1, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 1)
	t1LeaderCoord = nodeInfoList[t1.Leader].nsqdCoord
	test.NotNil(t, t1LeaderCoord)
	tc1, coordErr = t1LeaderCoord.getTopicCoord(topic_p2_r2, 1)
	test.Nil(t, coordErr)
	test.Equal(t, tc1.topicInfo.Leader, t1.Leader)
	test.Equal(t, len(tc1.topicInfo.ISR), 2)

	// test create on exist topic, create on partial partition
	oldMeta, _, err := lookupCoord1.leadership.GetTopicMetaInfo(topic_p2_r2)
	test.Nil(t, err)
	err = lookupCoord1.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 1, 1})
	test.NotNil(t, err)
	waitClusterStable(lookupCoord1, time.Second)
	waitClusterStable(lookupCoord1, time.Second*5)
	newMeta, _, err := lookupCoord1.leadership.GetTopicMetaInfo(topic_p2_r2)
	test.Nil(t, err)
	test.Equal(t, oldMeta, newMeta)
}
func testNsqLookupNsqdNodesChange(t *testing.T, useFakeLeadership bool) {
	if testing.Verbose() {
		SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_INFO)
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	} else {
		SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG)
	}
	idList := []string{"id1", "id2", "id3", "id4", "id5"}
	lookupCoord1, nodeInfoList := prepareCluster(t, idList, useFakeLeadership)
	for _, n := range nodeInfoList {
		defer os.RemoveAll(n.dataPath)
		defer n.localNsqd.Exit()
		defer n.nsqdCoord.Stop()
	}

	topic := "test-nsqlookup-topic-unit-test"
	lookupLeadership := lookupCoord1.leadership

	lookupCoord1.DeleteTopic(topic, "**")
	topic3 := topic + topic
	lookupCoord1.DeleteTopic(topic3, "**")
	time.Sleep(time.Second)
	defer func() {
		lookupCoord1.DeleteTopic(topic, "**")
		lookupCoord1.DeleteTopic(topic3, "**")
		time.Sleep(time.Second * 3)
		lookupCoord1.Stop()
	}()

	// test new topic create
	err := lookupCoord1.CreateTopic(topic, TopicMetaInfo{2, 2, 0, 0, 0, 0})
	test.Nil(t, err)

	waitClusterStable(lookupCoord1, time.Second*3)

	pmeta, _, err := lookupLeadership.GetTopicMetaInfo(topic)
	pn := pmeta.PartitionNum
	test.Nil(t, err)
	test.Equal(t, pn, 2)
	t0, err := lookupLeadership.GetTopicInfo(topic, 0)
	test.Nil(t, err)
	t1, err := lookupLeadership.GetTopicInfo(topic, 1)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 2)
	test.Equal(t, len(t1.ISR), 2)
	t.Log(t0)
	t.Log(t1)
	test.NotEqual(t, t0.Leader, t1.Leader)

	t0LeaderCoord := nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr := t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	test.Equal(t, len(tc0.topicInfo.ISR), 2)

	t1LeaderCoord := nodeInfoList[t1.Leader].nsqdCoord
	test.NotNil(t, t1LeaderCoord)
	tc1, coordErr := t1LeaderCoord.getTopicCoord(topic, 1)
	test.Nil(t, coordErr)
	test.Equal(t, tc1.topicInfo.Leader, t1.Leader)
	test.Equal(t, len(tc1.topicInfo.ISR), 2)

	coordLog.Warningf("============= begin test isr node failed  ====")
	// test isr node lost
	lostNodeID := t0.ISR[1]
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, err = lookupLeadership.GetTopicInfo(topic, 0)
	test.Nil(t, err)
	test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true)
	test.Equal(t, len(t0.ISR), t0.Replica)
	test.Equal(t, t0.Leader, t0.ISR[0])

	// clear topic info on failed node, test the reload for failed node
	nodeInfoList[lostNodeID].nsqdCoord.topicCoords = make(map[string]map[int]*TopicCoordinator)

	// test new catchup and new isr
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	test.Equal(t, len(t0.CatchupList), 0)
	test.Equal(t, len(t0.ISR) >= t0.Replica, true)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, t0.Leader, t0.ISR[0])
	lookupCoord1.triggerCheckTopics("", 0, time.Second)
	time.Sleep(time.Second)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	// should remove the unnecessary node
	test.Equal(t, len(t0.ISR), t0.Replica)

	coordLog.Warningf("============= begin test leader failed  ====")
	// test leader node lost
	lostNodeID = t0.Leader
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	t.Log(t0)
	test.Equal(t, t0.Replica, len(t0.ISR))
	test.Equal(t, t0.Leader, t0.ISR[0])
	test.NotEqual(t, t0.Leader, lostNodeID)
	//test.Equal(t, len(t0.CatchupList), 1)
	test.Equal(t, FindSlice(t0.ISR, lostNodeID) == -1, true)
	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)

	// test lost leader node rejoin
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	if len(t0.ISR) < t0.Replica {
		waitClusterStable(lookupCoord1, time.Second*3)
	}

	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	t.Log(t0)

	test.Equal(t, len(t0.CatchupList), 0)
	test.Equal(t, len(t0.ISR) >= t0.Replica, true)
	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	waitClusterStable(lookupCoord1, time.Second*3)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	// should remove the unnecessary node
	test.Equal(t, len(t0.ISR), t0.Replica)

	// test old leader failed and begin elect new and then new leader failed
	coordLog.Warningf("============= begin test old leader failed and then new leader failed ====")
	lostNodeID = t0.Leader
	lostISRID := t0.ISR[1]
	if lostISRID == lostNodeID {
		lostISRID = t0.ISR[0]
	}
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	time.Sleep(time.Millisecond)
	atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 1)
	nodeInfoList[lostISRID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostISRID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	atomic.StoreInt32(&nodeInfoList[lostISRID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	nodeInfoList[lostISRID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostISRID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*3)
	waitClusterStable(lookupCoord1, time.Second*5)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	test.Equal(t, true, len(t0.ISR) >= t0.Replica)
	test.Equal(t, t0.Leader == t0.ISR[0] || t0.Leader == t0.ISR[1], true)

	t0LeaderCoord = nodeInfoList[t0.Leader].nsqdCoord
	test.NotNil(t, t0LeaderCoord)
	tc0, coordErr = t0LeaderCoord.getTopicCoord(topic, 0)
	test.Nil(t, coordErr)
	test.Equal(t, len(tc0.topicInfo.ISR), len(t0.ISR))
	test.Equal(t, tc0.topicInfo.Leader, t0.Leader)
	waitClusterStable(lookupCoord1, time.Second*5)
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	// should remove the unnecessary node
	test.Equal(t, t0.Replica, len(t0.ISR))

	// test join isr timeout
	lostNodeID = t1.ISR[1]
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	// with only 2 replica, the isr join fail should not change the isr list
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true)
	waitClusterStable(lookupCoord1, time.Second*10)
	t1, _ = lookupLeadership.GetTopicInfo(topic, 1)
	test.Equal(t, true, len(t1.ISR)+len(t1.CatchupList) >= t1.Replica)
	test.Equal(t, t1.Leader == t1.ISR[0] || t1.Leader == t1.ISR[1], true)
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false)
	waitClusterStable(lookupCoord1, time.Second*5)
	// test new topic create
	coordLog.Warningf("============= begin test 3 replicas ====")
	err = lookupCoord1.CreateTopic(topic3, TopicMetaInfo{1, 3, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord1, time.Second*5)
	// with 3 replica, the isr join timeout will change the isr list if the isr has the quorum nodes
	t3, err := lookupLeadership.GetTopicInfo(topic3, 0)
	test.Nil(t, err)
	test.Equal(t, len(t3.ISR), t3.Replica)
	lostNodeID = t3.ISR[1]
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 1)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.UnregisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	atomic.StoreInt32(&nodeInfoList[lostNodeID].nsqdCoord.stopping, 0)
	nodeInfoList[lostNodeID].nsqdCoord.leadership.RegisterNsqd(nodeInfoList[lostNodeID].nodeInfo)
	waitClusterStable(lookupCoord1, time.Second*5)
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(true)
	waitClusterStable(lookupCoord1, time.Second*5)
	t3, _ = lookupLeadership.GetTopicInfo(topic3, 0)
	test.Equal(t, true, len(t3.ISR) >= t3.Replica-1)
	test.Equal(t, true, len(t3.ISR) <= t3.Replica)
	test.Equal(t, t3.Leader == t3.ISR[0] || t3.Leader == t3.ISR[1], true)
	nodeInfoList[lostNodeID].nsqdCoord.rpcServer.toggleDisableRpcTest(false)
	waitClusterStable(lookupCoord1, time.Second*5)
	glog.Flush()
	t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
	test.Equal(t, true, len(t0.ISR) >= t0.Replica)
	t1, _ = lookupLeadership.GetTopicInfo(topic, 1)
	test.Equal(t, true, len(t1.ISR) >= t0.Replica)
	// before migrate really start, the isr should not reach the replica factor
	// however, catch up may start early while check leadership or enable topic write
	t3, _ = lookupLeadership.GetTopicInfo(topic3, 0)
	test.Equal(t, true, len(t3.ISR)+len(t3.CatchupList) >= t3.Replica)

	t0IsrNum := 2
	t1IsrNum := 2
	coordLog.Warningf("========== begin test quit ====")

	quitList := make([]*NsqdCoordinator, 0)
	quitList = append(quitList, nodeInfoList[t0.Leader].nsqdCoord)
	if t1.Leader != t0.Leader {
		quitList = append(quitList, nodeInfoList[t1.Leader].nsqdCoord)
	}
	if t3.Leader != t0.Leader && t3.Leader != t1.Leader {
		quitList = append(quitList, nodeInfoList[t3.Leader].nsqdCoord)
	}
	for id, n := range nodeInfoList {
		if id == t0.Leader || id == t1.Leader || id == t3.Leader {
			continue
		}
		quitList = append(quitList, n.nsqdCoord)
	}
	test.Equal(t, len(nodeInfoList), len(quitList))

	for _, nsqdCoord := range quitList {
		failedID := nsqdCoord.myNode.GetID()
		delete(nodeInfoList, failedID)
		nsqdCoord.Stop()
		if t0IsrNum > 1 {
			if FindSlice(t0.ISR, failedID) != -1 {
				t0IsrNum--
			}
		}
		if t1IsrNum > 1 {
			if FindSlice(t1.ISR, failedID) != -1 {
				t1IsrNum--
			}
		}

		waitClusterStable(lookupCoord1, time.Second*5)
		t0, _ = lookupLeadership.GetTopicInfo(topic, 0)
		// we have no failed node in isr or we got the last failed node leaving in isr.
		t.Log(t0)
		test.Equal(t, FindSlice(t0.ISR, failedID) == -1 || (len(t0.ISR) == 1 && t0.ISR[0] == failedID), true)
		test.Equal(t, true, len(t0.ISR) >= t0IsrNum)
		t1, _ = lookupLeadership.GetTopicInfo(topic, 1)
		t.Log(t1)
		test.Equal(t, FindSlice(t1.ISR, failedID) == -1 || (len(t1.ISR) == 1 && t1.ISR[0] == failedID), true)
		test.Equal(t, true, len(t1.ISR) >= t1IsrNum)
		t3, _ = lookupLeadership.GetTopicInfo(topic3, 0)
		t.Log(t3)
		test.Equal(t, FindSlice(t3.ISR, failedID) == -1 || (len(t3.ISR) == 1 && t3.ISR[0] == failedID), true)
	}
}
func TestNsqLookupMovePartition(t *testing.T) {
	if testing.Verbose() {
		SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN)
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	} else {
		SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG)
	}

	idList := []string{"id1", "id2", "id3", "id4", "id5"}
	lookupCoord, nodeInfoList := prepareCluster(t, idList, false)
	for _, n := range nodeInfoList {
		defer os.RemoveAll(n.dataPath)
		defer n.localNsqd.Exit()
		defer n.nsqdCoord.Stop()
	}

	topic_p1_r1 := "test-nsqlookup-topic-unit-test-move-p1-r1"
	topic_p2_r2 := "test-nsqlookup-topic-unit-test-move-p2-r2"
	lookupLeadership := lookupCoord.leadership

	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**"))
	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**"))
	time.Sleep(time.Second * 3)
	defer func() {
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**"))
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**"))
		time.Sleep(time.Second * 3)
		lookupCoord.Stop()
	}()

	// test new topic create
	err := lookupCoord.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)

	err = lookupCoord.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	lookupCoord.triggerCheckTopics("", 0, 0)
	waitClusterStable(lookupCoord, time.Second*3)
	// test move leader to other isr;
	// test move leader to other catchup;
	// test move non-leader to other node;
	t0, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 1)

	// move p1_r1 leader to other node
	toNode := ""
	for _, node := range nodeInfoList {
		if node.nodeInfo.GetID() == t0.Leader {
			continue
		}
		toNode = node.nodeInfo.GetID()
		break
	}
	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second)
	err = lookupCoord.MoveTopicPartitionDataByManual(topic_p1_r1, 0, true, t0.Leader, toNode)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)

	t0, err = lookupLeadership.GetTopicInfo(topic_p1_r1, 0)
	test.Nil(t, err)
	// it may be two nodes in isr if the moved leader rejoin as isr
	test.Equal(t, len(t0.ISR) >= 1, true)
	test.Equal(t, t0.Leader, toNode)

	t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 2)

	toNode = ""
	for _, nid := range t0.ISR {
		if nid == t0.Leader {
			continue
		}
		toNode = nid
		break
	}
	waitClusterStable(lookupCoord, time.Second*3)
	// move leader to other isr node
	oldLeader := t0.Leader
	err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, true, t0.Leader, toNode)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR) >= 2, true)
	test.NotEqual(t, t0.Leader, oldLeader)
	test.Equal(t, t0.Leader, toNode)

	// move leader to other non-isr node
	toNode = ""
	for _, node := range nodeInfoList {
		if FindSlice(t0.ISR, node.nodeInfo.GetID()) != -1 {
			continue
		}
		// check other partition
		t1, err := lookupLeadership.GetTopicInfo(topic_p2_r2, 1)
		if err == nil {
			if FindSlice(t1.ISR, node.nodeInfo.GetID()) != -1 {
				continue
			}
		}
		toNode = node.nodeInfo.GetID()
		break
	}

	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second)
	err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, true, t0.Leader, toNode)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0)
	test.Nil(t, err)
	test.Equal(t, t0.Leader, toNode)

	// move non-leader to other non-isr node
	toNode = ""
	toNodeInvalid := ""
	fromNode := ""
	for _, nid := range t0.ISR {
		if nid != t0.Leader {
			fromNode = nid
		}
	}
	for _, node := range nodeInfoList {
		if FindSlice(t0.ISR, node.nodeInfo.GetID()) != -1 {
			continue
		}
		// check other partition
		t1, err := lookupLeadership.GetTopicInfo(topic_p2_r2, 1)
		if err == nil {
			toNodeInvalid = t1.Leader
			if FindSlice(t1.ISR, node.nodeInfo.GetID()) != -1 {
				continue
			}
		}
		toNode = node.nodeInfo.GetID()
		break
	}
	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second)
	err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, false, fromNode, toNodeInvalid)
	test.NotNil(t, err)
	test.Equal(t, ErrNodeIsExcludedForTopicData, err)

	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second)
	err = lookupCoord.MoveTopicPartitionDataByManual(topic_p2_r2, 0, false, fromNode, toNode)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic_p2_r2, 0)
	test.Nil(t, err)
	test.Equal(t, FindSlice(t0.ISR, toNode) != -1, true)
	test.Equal(t, -1, FindSlice(t0.ISR, fromNode))

}
func TestNsqLookupExpandPartition(t *testing.T) {
	if testing.Verbose() {
		SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN)
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	} else {
		SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG)
	}

	idList := []string{"id1", "id2", "id3", "id4", "id5", "id6"}
	lookupCoord, nodeInfoList := prepareCluster(t, idList, false)
	for _, n := range nodeInfoList {
		defer os.RemoveAll(n.dataPath)
		defer n.localNsqd.Exit()
		defer n.nsqdCoord.Stop()
	}

	topic_p1_r1 := "test-nsqlookup-topic-unit-test-expand-p1-r1"
	topic_p1_r2 := "test-nsqlookup-topic-unit-test-expand-p1-r2"
	topic_p1_r3 := "test-nsqlookup-topic-unit-test-expand-p1-r3"
	lookupLeadership := lookupCoord.leadership

	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**"))
	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r2, "**"))
	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**"))
	time.Sleep(time.Second * 3)
	defer func() {
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**"))
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r2, "**"))
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**"))
		time.Sleep(time.Second * 3)
		lookupCoord.Stop()
	}()

	err := lookupCoord.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second)

	err = lookupCoord.CreateTopic(topic_p1_r2, TopicMetaInfo{1, 2, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second)

	err = lookupCoord.CreateTopic(topic_p1_r3, TopicMetaInfo{1, 3, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second)
	waitClusterStable(lookupCoord, time.Second)

	waitClusterStable(lookupCoord, time.Second*3)
	err = lookupCoord.ExpandTopicPartition(topic_p1_r1, 3)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	t0, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), 1)
	t1, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 1)
	test.Nil(t, err)
	test.Equal(t, len(t1.ISR), 1)
	t2, err := lookupLeadership.GetTopicInfo(topic_p1_r1, 2)
	test.Nil(t, err)
	test.Equal(t, len(t2.ISR), 1)

	lookupCoord.triggerCheckTopics("", 0, 0)
	waitClusterStable(lookupCoord, time.Second*3)
	err = lookupCoord.ExpandTopicPartition(topic_p1_r2, 2)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), t0.Replica)
	t1, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 1)
	test.Nil(t, err)
	test.Equal(t, len(t1.ISR), t1.Replica)

	lookupCoord.triggerCheckTopics("", 0, 0)
	waitClusterStable(lookupCoord, time.Second*3)
	err = lookupCoord.ExpandTopicPartition(topic_p1_r2, 3)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), t0.Replica)
	t1, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 1)
	test.Nil(t, err)
	test.Equal(t, len(t1.ISR), t1.Replica)
	t2, err = lookupLeadership.GetTopicInfo(topic_p1_r2, 2)
	test.Nil(t, err)
	test.Equal(t, len(t2.ISR), t2.Replica)

	waitClusterStable(lookupCoord, time.Second*3)
	// should fail
	err = lookupCoord.ExpandTopicPartition(topic_p1_r2, 4)
	test.NotNil(t, err)

	err = lookupCoord.ExpandTopicPartition(topic_p1_r3, 2)
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*3)
	t0, err = lookupLeadership.GetTopicInfo(topic_p1_r3, 0)
	test.Nil(t, err)
	test.Equal(t, len(t0.ISR), t0.Replica)
	t1, err = lookupLeadership.GetTopicInfo(topic_p1_r3, 1)
	test.Nil(t, err)
	test.Equal(t, len(t1.ISR), t1.Replica)

	waitClusterStable(lookupCoord, time.Second*3)
	// should fail
	err = lookupCoord.ExpandTopicPartition(topic_p1_r3, 3)
	test.NotNil(t, err)
}
func TestNsqLookupMarkNodeRemove(t *testing.T) {
	if testing.Verbose() {
		SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN)
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	} else {
		SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG)
	}

	idList := []string{"id1", "id2", "id3", "id4", "id5"}
	lookupCoord, nodeInfoList := prepareCluster(t, idList, false)
	for _, n := range nodeInfoList {
		defer os.RemoveAll(n.dataPath)
		defer n.localNsqd.Exit()
		defer n.nsqdCoord.Stop()
	}

	topic_p4_r1 := "test-nsqlookup-topic-unit-test-removenode-p4-r1"
	topic_p2_r2 := "test-nsqlookup-topic-unit-test-removenode-p2-r2"
	topic_p1_r3 := "test-nsqlookup-topic-unit-test-removenode-p1-r3"
	lookupLeadership := lookupCoord.leadership

	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p4_r1, "**"))
	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**"))
	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**"))
	time.Sleep(time.Second * 3)
	defer func() {
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p4_r1, "**"))
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r2, "**"))
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r3, "**"))
		time.Sleep(time.Second * 3)
		lookupCoord.Stop()
	}()

	err := lookupCoord.CreateTopic(topic_p4_r1, TopicMetaInfo{4, 1, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second)

	err = lookupCoord.CreateTopic(topic_p2_r2, TopicMetaInfo{2, 2, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second)

	err = lookupCoord.CreateTopic(topic_p1_r3, TopicMetaInfo{1, 3, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second)

	waitClusterStable(lookupCoord, time.Second*5)

	nid := ""
	for _, n := range nodeInfoList {
		nid = n.nodeInfo.GetID()
		break
	}
	err = lookupCoord.MarkNodeAsRemoving(nid)
	test.Nil(t, err)
	checkStart := time.Now()
	for time.Since(checkStart) < time.Minute*2 {
		time.Sleep(time.Second)
		isDone := true
		for i := 0; i < 4; i++ {
			info, err := lookupLeadership.GetTopicInfo(topic_p4_r1, i)
			test.Nil(t, err)
			if FindSlice(info.ISR, nid) != -1 {
				t.Logf("still waiting remove: %v", info)
				isDone = false
				break
			}
		}
		if !isDone {
			continue
		}
		time.Sleep(time.Second)
		for i := 0; i < 2; i++ {
			info, err := lookupLeadership.GetTopicInfo(topic_p2_r2, i)
			test.Nil(t, err)
			if FindSlice(info.ISR, nid) != -1 {
				t.Logf("still waiting remove: %v", info)
				isDone = false
				break
			}
		}
		if !isDone {
			continue
		}
		time.Sleep(time.Second)
		info, err := lookupLeadership.GetTopicInfo(topic_p1_r3, 0)
		test.Nil(t, err)
		if FindSlice(info.ISR, nid) != -1 {
			t.Logf("still waiting remove: %v from removing node", info)
			isDone = false
		}
		t.Logf("all done")

		if isDone {
			break
		}
	}
	for time.Since(checkStart) < time.Minute*2 {
		lookupCoord.nodesMutex.Lock()
		state := lookupCoord.removingNodes[nid]
		lookupCoord.nodesMutex.Unlock()
		if state == "data_transfered" || state == "done" {
			break
		} else {
			t.Logf("still waiting state: %v ", state)
		}
		time.Sleep(time.Second)
	}
	if time.Since(checkStart) >= time.Minute*2 {
		t.Error("remove node timeout")
	}
}
func TestNsqLookupUpdateTopicMeta(t *testing.T) {
	if testing.Verbose() {
		SetCoordLogger(&levellogger.GLogger{}, levellogger.LOG_WARN)
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	} else {
		SetCoordLogger(newTestLogger(t), levellogger.LOG_DEBUG)
	}

	idList := []string{"id1", "id2", "id3", "id4"}
	lookupCoord, nodeInfoList := prepareCluster(t, idList, false)
	for _, n := range nodeInfoList {
		defer os.RemoveAll(n.dataPath)
		defer n.localNsqd.Exit()
		defer n.nsqdCoord.Stop()
	}

	topic_p1_r1 := "test-nsqlookup-topic-unit-test-updatemeta-p1-r1"
	topic_p2_r1 := "test-nsqlookup-topic-unit-test-updatemeta-p2-r1"
	lookupLeadership := lookupCoord.leadership

	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**"))
	checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r1, "**"))
	time.Sleep(time.Second * 3)
	defer func() {
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p1_r1, "**"))
		checkDeleteErr(t, lookupCoord.DeleteTopic(topic_p2_r1, "**"))
		time.Sleep(time.Second * 3)
		lookupCoord.Stop()
	}()

	err := lookupCoord.CreateTopic(topic_p1_r1, TopicMetaInfo{1, 1, 0, 0, 0, 0})
	test.Nil(t, err)
	time.Sleep(time.Second)

	err = lookupCoord.CreateTopic(topic_p2_r1, TopicMetaInfo{2, 1, 0, 0, 0, 0})
	test.Nil(t, err)
	waitClusterStable(lookupCoord, time.Second*5)

	// test increase replicator and decrease the replicator
	err = lookupCoord.ChangeTopicMetaParam(topic_p1_r1, -1, -1, 3)
	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second * 5)
	tmeta, _, _ := lookupLeadership.GetTopicMetaInfo(topic_p1_r1)
	test.Equal(t, 3, tmeta.Replica)
	for i := 0; i < tmeta.PartitionNum; i++ {
		info, err := lookupLeadership.GetTopicInfo(topic_p1_r1, i)
		test.Nil(t, err)
		test.Equal(t, tmeta.Replica, len(info.ISR))
	}

	err = lookupCoord.ChangeTopicMetaParam(topic_p1_r1, -1, -1, 2)
	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second * 3)
	tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p1_r1)
	test.Equal(t, 2, tmeta.Replica)
	for i := 0; i < tmeta.PartitionNum; i++ {
		info, err := lookupLeadership.GetTopicInfo(topic_p1_r1, i)
		test.Nil(t, err)
		test.Equal(t, tmeta.Replica, len(info.ISR))
	}

	err = lookupCoord.ChangeTopicMetaParam(topic_p2_r1, -1, -1, 2)
	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second * 3)
	tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p2_r1)
	test.Equal(t, 2, tmeta.Replica)
	for i := 0; i < tmeta.PartitionNum; i++ {
		info, err := lookupLeadership.GetTopicInfo(topic_p2_r1, i)
		test.Nil(t, err)
		test.Equal(t, tmeta.Replica, len(info.ISR))
	}

	// should fail
	err = lookupCoord.ChangeTopicMetaParam(topic_p2_r1, -1, -1, 3)
	test.NotNil(t, err)

	err = lookupCoord.ChangeTopicMetaParam(topic_p2_r1, -1, -1, 1)
	lookupCoord.triggerCheckTopics("", 0, 0)
	time.Sleep(time.Second * 3)
	tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p2_r1)
	test.Equal(t, 1, tmeta.Replica)
	for i := 0; i < tmeta.PartitionNum; i++ {
		info, err := lookupLeadership.GetTopicInfo(topic_p2_r1, i)
		test.Nil(t, err)
		test.Equal(t, tmeta.Replica, len(info.ISR))
	}

	// test update the sync and retention , all partition and replica should be updated
	err = lookupCoord.ChangeTopicMetaParam(topic_p1_r1, 1234, 3, -1)
	time.Sleep(time.Second)
	tmeta, _, _ = lookupLeadership.GetTopicMetaInfo(topic_p1_r1)
	test.Equal(t, 1234, tmeta.SyncEvery)
	test.Equal(t, int32(3), tmeta.RetentionDay)
	for i := 0; i < tmeta.PartitionNum; i++ {
		info, err := lookupLeadership.GetTopicInfo(topic_p1_r1, i)
		test.Nil(t, err)
		for _, nid := range info.ISR {
			localNsqd := nodeInfoList[nid].localNsqd
			localTopic, err := localNsqd.GetExistingTopic(topic_p1_r1, i)
			test.Nil(t, err)
			dinfo := localTopic.GetDynamicInfo()
			test.Equal(t, int64(1234), dinfo.SyncEvery)
			test.Equal(t, int32(3), dinfo.RetentionDay)
		}
	}
}
예제 #10
0
func main() {
	glog.InitWithFlag(flagSet)
	flagSet.Parse(os.Args[1:])
	glog.StartWorker(time.Second)
	if *ordered {
		*trace = true
	}
	config = nsq.NewConfig()
	config.MsgTimeout = time.Second * time.Duration(10*(*channelNum))
	if config.MsgTimeout >= time.Second*200 {
		config.MsgTimeout = time.Second * 200
	}
	config.DefaultRequeueDelay = time.Second * 30
	config.MaxRequeueDelay = time.Second * 60
	config.MaxInFlight = 20
	config.EnableTrace = *trace
	config.EnableOrdered = *ordered

	log.SetPrefix("[bench_writer] ")
	dumpCheck = make(map[string]map[uint64]*nsq.Message, 5)
	pubRespCheck = make(map[string]map[uint64]pubResp, 5)
	orderCheck = make(map[string]pubResp)
	traceIDWaitingList = make(map[string]map[uint64]*nsq.Message, 5)
	pubTraceFailedList = make(map[string]map[uint64]int64)

	topicMutex = make(map[string]*sync.Mutex)
	if *topicListFile != "" {
		f, err := os.Open(*topicListFile)
		if err != nil {
			log.Printf("load topic list file error: %v", err)
		} else {
			scanner := bufio.NewScanner(f)
			for scanner.Scan() {
				line := scanner.Text()
				line = strings.TrimSpace(line)
				topics = append(topics, line)
			}
		}
	}
	log.Printf("testing topic list: %v", topics)

	msg := make([]byte, *size)
	batch := make([][]byte, *batchSize)
	for i := range batch {
		batch[i] = msg
	}

	if *benchCase == "simple" {
		startSimpleTest(msg, batch)
	} else if *benchCase == "benchpub" {
		startBenchPub(msg, batch)
	} else if *benchCase == "benchsub" {
		startBenchSub()
	} else if *benchCase == "checkdata" {
		startCheckData(msg, batch)
	} else if *benchCase == "benchlookup" {
		startBenchLookup()
	} else if *benchCase == "benchreg" {
		startBenchLookupRegUnreg()
	} else if *benchCase == "consumeoffset" {
		startCheckSetConsumerOffset()
	} else if *benchCase == "checkdata2" {
		startCheckData2()
	}
}
예제 #11
0
func TestTopicResetWithQueueStart(t *testing.T) {
	opts := NewOptions()
	opts.Logger = newTestLogger(t)
	if testing.Verbose() {
		opts.Logger = &levellogger.GLogger{}
		opts.LogLevel = 3
		glog.SetFlags(0, "", "", true, true, 1)
		glog.StartWorker(time.Second)
	}
	opts.MaxBytesPerFile = 1024 * 1024
	_, _, nsqd := mustStartNSQD(opts)
	defer os.RemoveAll(opts.DataPath)
	defer nsqd.Exit()

	topic := nsqd.GetTopic("test", 0)
	topic.dynamicConf.AutoCommit = 1
	topic.dynamicConf.SyncEvery = 10

	msgNum := 5000
	channel := topic.GetChannel("ch")
	test.NotNil(t, channel)
	msg := NewMessage(0, make([]byte, 1000))
	msg.Timestamp = time.Now().Add(-1 * time.Hour * time.Duration(24*4)).UnixNano()
	msgSize := int32(0)
	var dend BackendQueueEnd
	for i := 0; i <= msgNum; i++ {
		msg.ID = 0
		_, _, msgSize, dend, _ = topic.PutMessage(msg)
		msg.Timestamp = time.Now().Add(-1 * time.Hour * 24 * time.Duration(4-dend.(*diskQueueEndInfo).EndOffset.FileNum)).UnixNano()
	}
	topic.ForceFlush()

	fileNum := topic.backend.diskWriteEnd.EndOffset.FileNum
	test.Equal(t, int64(0), topic.backend.GetQueueReadStart().(*diskQueueEndInfo).EndOffset.FileNum)

	test.Equal(t, true, fileNum >= 4)
	nsqLog.Warningf("reading the topic %v backend ", topic.GetFullName())
	for i := 0; i < 100; i++ {
		msg := <-channel.clientMsgChan
		channel.ConfirmBackendQueue(msg)
	}

	topic.dynamicConf.RetentionDay = 2

	oldEnd := topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo)
	// reset with new start
	resetStart := &diskQueueEndInfo{}
	resetStart.virtualEnd = topic.backend.GetQueueWriteEnd().Offset() + BackendOffset(msgSize*10)
	resetStart.totalMsgCnt = topic.backend.GetQueueWriteEnd().TotalMsgCnt() + 10
	err := topic.ResetBackendWithQueueStartNoLock(int64(resetStart.Offset()), resetStart.TotalMsgCnt())
	test.NotNil(t, err)
	topic.DisableForSlave()
	err = topic.ResetBackendWithQueueStartNoLock(int64(resetStart.Offset()), resetStart.TotalMsgCnt())
	test.Nil(t, err)
	topic.EnableForMaster()

	nsqLog.Warningf("reset the topic %v backend with queue start: %v", topic.GetFullName(), resetStart)
	test.Equal(t, resetStart.Offset(), BackendOffset(topic.GetQueueReadStart()))
	newEnd := topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo)
	test.Equal(t, resetStart.Offset(), newEnd.Offset())
	test.Equal(t, resetStart.TotalMsgCnt(), newEnd.TotalMsgCnt())
	test.Equal(t, true, newEnd.EndOffset.GreatThan(&oldEnd.EndOffset))
	test.Equal(t, int64(0), newEnd.EndOffset.Pos)
	test.Equal(t, resetStart.Offset(), channel.GetConfirmed().Offset())
	test.Equal(t, resetStart.TotalMsgCnt(), channel.GetChannelEnd().TotalMsgCnt())

	for i := 0; i < msgNum; i++ {
		msg.ID = 0
		_, _, msgSize, _, _ = topic.PutMessage(msg)
	}
	topic.ForceFlush()
	newEnd = topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo)
	test.Equal(t, resetStart.TotalMsgCnt()+int64(msgNum), newEnd.TotalMsgCnt())
	for i := 0; i < 100; i++ {
		msg := <-channel.clientMsgChan
		channel.ConfirmBackendQueue(msg)
		test.Equal(t, msg.offset+msg.rawMoveSize, channel.GetConfirmed().Offset())
	}

	// reset with old start
	topic.DisableForSlave()
	err = topic.ResetBackendWithQueueStartNoLock(int64(resetStart.Offset()), resetStart.TotalMsgCnt())
	test.Nil(t, err)

	topic.EnableForMaster()
	test.Equal(t, resetStart.Offset(), BackendOffset(topic.GetQueueReadStart()))
	newEnd = topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo)
	test.Equal(t, resetStart.Offset(), newEnd.Offset())
	test.Equal(t, resetStart.TotalMsgCnt(), newEnd.TotalMsgCnt())
	test.Equal(t, true, newEnd.EndOffset.GreatThan(&oldEnd.EndOffset))
	test.Equal(t, int64(0), newEnd.EndOffset.Pos)
	test.Equal(t, resetStart.Offset(), channel.GetConfirmed().Offset())
	test.Equal(t, resetStart.TotalMsgCnt(), channel.GetChannelEnd().TotalMsgCnt())
	for i := 0; i < msgNum; i++ {
		msg.ID = 0
		_, _, msgSize, dend, _ = topic.PutMessage(msg)
		msg.Timestamp = time.Now().Add(-1 * time.Hour * 24 * time.Duration(4-dend.(*diskQueueEndInfo).EndOffset.FileNum)).UnixNano()
	}
	topic.ForceFlush()
	newEnd = topic.backend.GetQueueWriteEnd().(*diskQueueEndInfo)
	test.Equal(t, resetStart.TotalMsgCnt()+int64(msgNum), newEnd.TotalMsgCnt())
	for i := 0; i < 100; i++ {
		msg := <-channel.clientMsgChan
		channel.ConfirmBackendQueue(msg)
		test.Equal(t, msg.offset+msg.rawMoveSize, channel.GetConfirmed().Offset())
	}
}