Ejemplo n.º 1
0
func TestGet(t *testing.T) {
	cfg, err := config.Get("/etc/yascrapy/core.json")
	if err != nil {
		t.Error(err.Error())
	}
	node := config.SSDBNode{
		Host: cfg.ProxyRedisIp,
		Port: cfg.ProxyRedisPort,
	}
	client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100})
	_, err = Get(client, "http_china")
	if err != nil {
		t.Error("TestGet fail ", err.Error())
	} else {
		t.Log("TestGet ok")
	}
}
Ejemplo n.º 2
0
func TestDelete(t *testing.T) {
	pcfg, err := config.GetProxyConfig()
	if err != nil {
		t.Error(err.Error())
	}
	node := config.SSDBNode{
		Host: pcfg.ProxyRedisIp,
		Port: pcfg.ProxyRedisPort,
	}
	client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100})
	err = Delete(client, "http_china")
	if err != nil {
		t.Error("TestDelete fail: ", err.Error())
	} else {
		t.Log("TestDelete success")
	}
}
Ejemplo n.º 3
0
func TestGetProxyNames(t *testing.T) {
	cfg, err := config.GetProxyConfig()
	if err != nil {
		t.Error(err.Error())
	}
	node := config.SSDBNode{
		Host: cfg.ProxyRedisIp,
		Port: cfg.ProxyRedisPort,
	}
	client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100})
	names, err := GetProxyNames(client)
	if err != nil {
		t.Error("TestGetProxyNames fail: ", err.Error())
	} else {
		t.Log(names)
	}
}
Ejemplo n.º 4
0
func TestAdd(t *testing.T) {
	cfg, err := config.GetProxyConfig()
	if err != nil {
		t.Error(err.Error())
	}
	node := config.SSDBNode{
		Host: cfg.ProxyRedisIp,
		Port: cfg.ProxyRedisPort,
	}
	client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100})
	cfgs := cfg.Configs
	for _, cfg := range cfgs {
		err = Add(client, cfg)
		if err != nil {
			t.Error("TestAdd fail: ", err.Error())
		}
	}
}
Ejemplo n.º 5
0
func BenchmarkGetCount(b *testing.B) {
	cfg, err := config.Get("/etc/yascrapy/core.json")
	if err != nil {
		b.Error(err.Error())
	}
	node := config.SSDBNode{
		Host: cfg.ProxyRedisIp,
		Port: cfg.ProxyRedisPort,
	}
	client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100})
	for i := 0; i < b.N; i++ {
		cnt, err := GetCount(client, "http_china")
		if err != nil {
			b.Error("TestGetCount fail ", err.Error())
		} else {
			b.Log(cnt)
		}
	}
}
Ejemplo n.º 6
0
func run(cfgFile string, consumerNumber int, crawlerName string) {
	var conn *amqp.Connection
	var consumers []*core_consumer.CoreConsumer
	cfg, err := config.Get(cfgFile)
	if err != nil {
		utils.Error.Println("read config file fail ", err.Error(), cfgFile)
		return
	}
	proxyNode := config.SSDBNode{
		Host: cfg.ProxyRedisIp,
		Port: cfg.ProxyRedisPort,
	}
	ssdbNodes := cfg.SSDBNodes
	if len(ssdbNodes) == 0 {
		utils.Error.Println("ssdb nodes can not be empty")
		return
	}
	ssdbClients := ssdb.GetClients(ssdbNodes, cfg, ssdb.Params{MaxIdle: 5000, MaxActive: 5000})
	proxyClient := ssdb.GetProxyClient(proxyNode, ssdb.Params{MaxIdle: 5000, MaxActive: 5000})
	connPool := &rabbitmq.ConnPool{
		MaxIdle:   10,
		MaxActive: 50,
		Dial:      rabbitmq.CreateConn,
		Cfg:       cfg,
	}
	maxConn := 1000
	httpClients := CrawlerHttpClients(consumerNumber, maxConn)

	sigs := make(chan os.Signal, 1)
	signal.Notify(sigs, os.Interrupt)
	signal.Notify(sigs, syscall.SIGTERM)
	go func() {
		<-sigs
		cleanup(cfg, proxyClient)
		os.Exit(0)
	}()

	for {
		reqQueues, err := core_queue.GetQueues("http_request:", cfg)
		if err != nil {
			utils.Error.Printf("GetReqQueues error:%s\n", err.Error())
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("GetReqQueues success")
		}

		respQueues, err := core_queue.GetQueues("http_response:", cfg)
		if err != nil {
			utils.Error.Printf("GetRespQueues error:%s\n", err.Error())
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("GetRespQueues success")
		}

		conn = connPool.Get().(*amqp.Connection)
		reqQueues, err = core_queue.UpdateQueueCount(reqQueues, conn)
		if err != nil {
			utils.Error.Println(err.Error())
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("UpdateQueueCount success")
		}
		connPool.Release(conn)

		reqQueues = core_queue.RemoveEmptyQueues(reqQueues)
		utils.Debug.Println(reqQueues)
		if len(reqQueues) == 0 {
			utils.Info.Println("req queues empty")
			time.Sleep(1 * time.Second)
			continue
		} else {
			utils.Debug.Println("RemoveEmptyQueues success")
		}
		core_queue.DeclareResponseQueues(conn, reqQueues, 5)

		conn = connPool.Get().(*amqp.Connection)
		respQueues, err = core_queue.UpdateQueueCount(respQueues, conn)
		utils.Debug.Println(respQueues)
		if err != nil {
			conn = nil
			utils.Error.Println(err.Error())
			time.Sleep(1 * time.Second)
			continue
		}
		connPool.Release(conn)

		if len(respQueues) == 0 {
			utils.Info.Println("resp queues empty")
			time.Sleep(1 * time.Second)
			continue
		}

		consumers = core_consumer.AliveCrawlerConsumers(consumers, crawlerName)
		utils.Debug.Println("total consumers: ", len(consumers), ", need start consumers:", consumerNumber-len(consumers))
		newConsumers := consumerNumber - len(consumers)

		for i := 0; i < newConsumers; i++ {
			reqQueueName, err := core_queue.GetRandReqQueue(reqQueues, crawlerName)
			if err != nil {
				utils.Error.Printf(err.Error())
				continue
			}
			utils.Debug.Printf("GetRandReqQueue %s\n", reqQueueName)
			respQueueName, err := core_queue.GetRandRespQueue(respQueues, crawlerName)
			if err != nil {
				utils.Error.Printf(err.Error())
				continue
			}
			utils.Debug.Printf("GetRandRespQueue %s\n", respQueueName)
			index := i / maxConn
			c := core_consumer.CreateCoreConsumer(i, crawlerName, reqQueueName, respQueueName)
			consumers = append(consumers, c)
			utils.Debug.Println("add consumer", c)
			go consumer(c, connPool, ssdbClients, httpClients[index], proxyClient)
		}
		if len(consumers) < consumerNumber {
			utils.Warning.Printf("aliveConsumers now %d, lower than %d\n", len(consumers), consumerNumber)
		}
		utils.Debug.Println("total consumers ", len(consumers))
		time.Sleep(1 * time.Second)
	}
}