Ejemplo n.º 1
0
func TestCoreConsumer(t *testing.T) {
	cfg, err := config.Get("/etc/yascrapy/core.json")
	if err != nil {
		t.Error(err.Error())
	}
	var consumers []*CoreConsumer
	var conn *amqp.Connection
	conn, err = rabbitmq.GetConn(cfg)
	if err != nil {
		t.Error("rabbitmq GetConn fail", err.Error())
	}
	queues, err := core_queue.GetQueues("http_request:", cfg)
	t.Log("queues cnt", len(queues))
	if err != nil {
		t.Error("Get Queues fail", err.Error())
	}
	queues, err = core_queue.UpdateQueueCount(queues, conn)
	if err != nil {
		t.Error("UpdateQueueCount fail", err.Error())
	}
	queues = core_queue.RemoveEmptyQueues(queues)
	t.Log("queues cnt", len(queues))
	rand.Seed(time.Now().Unix())
	consumers = AliveCoreConsumers(consumers)
	respQueues, err := core_queue.GetQueues("http_response:", cfg)
	if err != nil {
		t.Error("http response error", err.Error())
	}
	for i := 0; i < cfg.RabbitmqConsumers; i++ {
		reqQueue, err := core_queue.GetRandReqQueue(queues)
		if err != nil {
			t.Error(err.Error())
		}
		crawler := core_queue.GetCrawler(reqQueue, "http_request:")
		respQueueName, err := core_queue.GetRandRespQueue(respQueues, crawler)
		aliveReqConsumers := AliveReqConsumers(reqQueue.Name, consumers)
		aliveRespConsumers := AliveRespConsumers(respQueueName, consumers)
		if err != nil {
			t.Error(err.Error())
		}
		t.Log("req queue", reqQueue.Name, "resp queue", respQueueName)
		t.Log("aliveReqConsumers ", len(aliveReqConsumers), "aliveRespConsumers", len(aliveRespConsumers))
		c := CreateCoreConsumer(i, crawler, reqQueue.Name, respQueueName)
		consumers = append(consumers, c)
	}
	t.Error("to see output")
}
Ejemplo n.º 2
0
func run(cfgFile string, consumerNumber int, crawlerName string) {
	var conn *amqp.Connection
	var consumers []*core_consumer.CoreConsumer
	cfg, err := config.Get(cfgFile)
	if err != nil {
		utils.Error.Println("read config file fail ", err.Error(), cfgFile)
		return
	}
	proxyNode := config.SSDBNode{
		Host: cfg.ProxyRedisIp,
		Port: cfg.ProxyRedisPort,
	}
	ssdbNodes := cfg.SSDBNodes
	if len(ssdbNodes) == 0 {
		utils.Error.Println("ssdb nodes can not be empty")
		return
	}
	ssdbClients := ssdb.GetClients(ssdbNodes, cfg, ssdb.Params{MaxIdle: 5000, MaxActive: 5000})
	proxyClient := ssdb.GetProxyClient(proxyNode, ssdb.Params{MaxIdle: 5000, MaxActive: 5000})
	connPool := &rabbitmq.ConnPool{
		MaxIdle:   10,
		MaxActive: 50,
		Dial:      rabbitmq.CreateConn,
		Cfg:       cfg,
	}
	maxConn := 1000
	httpClients := CrawlerHttpClients(consumerNumber, maxConn)

	sigs := make(chan os.Signal, 1)
	signal.Notify(sigs, os.Interrupt)
	signal.Notify(sigs, syscall.SIGTERM)
	go func() {
		<-sigs
		cleanup(cfg, proxyClient)
		os.Exit(0)
	}()

	for {
		reqQueues, err := core_queue.GetQueues("http_request:", cfg)
		if err != nil {
			utils.Error.Printf("GetReqQueues error:%s\n", err.Error())
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("GetReqQueues success")
		}

		respQueues, err := core_queue.GetQueues("http_response:", cfg)
		if err != nil {
			utils.Error.Printf("GetRespQueues error:%s\n", err.Error())
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("GetRespQueues success")
		}

		conn = connPool.Get().(*amqp.Connection)
		reqQueues, err = core_queue.UpdateQueueCount(reqQueues, conn)
		if err != nil {
			utils.Error.Println(err.Error())
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("UpdateQueueCount success")
		}
		connPool.Release(conn)

		reqQueues = core_queue.RemoveEmptyQueues(reqQueues)
		utils.Debug.Println(reqQueues)
		if len(reqQueues) == 0 {
			utils.Info.Println("req queues empty")
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("RemoveEmptyQueues success")
		}
		core_queue.DeclareResponseQueues(conn, reqQueues, 5)

		conn = connPool.Get().(*amqp.Connection)
		respQueues, err = core_queue.UpdateQueueCount(respQueues, conn)
		utils.Debug.Println(respQueues)
		if err != nil {
			conn = nil
			utils.Error.Println(err.Error())
			time.Sleep(1 * time.Second)
			continue
		}
		connPool.Release(conn)

		if len(respQueues) == 0 {
			utils.Info.Println("resp queues empty")
			time.Sleep(1 * time.Second)
			continue
		}

		consumers = core_consumer.AliveCrawlerConsumers(consumers, crawlerName)
		utils.Debug.Println("total consumers: ", len(consumers), ", need start consumers:", consumerNumber-len(consumers))
		newConsumers := consumerNumber - len(consumers)

		for i := 0; i < newConsumers; i++ {
			reqQueueName, err := core_queue.GetRandReqQueue(reqQueues, crawlerName)
			if err != nil {
				utils.Error.Printf(err.Error())
				continue
			}
			utils.Debug.Printf("GetRandReqQueue %s\n", reqQueueName)
			respQueueName, err := core_queue.GetRandRespQueue(respQueues, crawlerName)
			if err != nil {
				utils.Error.Printf(err.Error())
				continue
			}
			utils.Debug.Printf("GetRandRespQueue %s\n", respQueueName)
			index := i / maxConn
			c := core_consumer.CreateCoreConsumer(i, crawlerName, reqQueueName, respQueueName)
			consumers = append(consumers, c)
			utils.Debug.Println("add consumer", c)
			go consumer(c, connPool, ssdbClients, httpClients[index], proxyClient)
		}
		if len(consumers) < consumerNumber {
			utils.Warning.Printf("aliveConsumers now %d, lower than %d\n", len(consumers), consumerNumber)
		}
		utils.Debug.Println("total consumers ", len(consumers))
		time.Sleep(1 * time.Second)
	}
}