func TestGet(t *testing.T) { cfg, err := config.Get("/etc/yascrapy/core.json") if err != nil { t.Error(err.Error()) } node := config.SSDBNode{ Host: cfg.ProxyRedisIp, Port: cfg.ProxyRedisPort, } client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100}) _, err = Get(client, "http_china") if err != nil { t.Error("TestGet fail ", err.Error()) } else { t.Log("TestGet ok") } }
func TestDelete(t *testing.T) { pcfg, err := config.GetProxyConfig() if err != nil { t.Error(err.Error()) } node := config.SSDBNode{ Host: pcfg.ProxyRedisIp, Port: pcfg.ProxyRedisPort, } client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100}) err = Delete(client, "http_china") if err != nil { t.Error("TestDelete fail: ", err.Error()) } else { t.Log("TestDelete success") } }
func TestGetProxyNames(t *testing.T) { cfg, err := config.GetProxyConfig() if err != nil { t.Error(err.Error()) } node := config.SSDBNode{ Host: cfg.ProxyRedisIp, Port: cfg.ProxyRedisPort, } client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100}) names, err := GetProxyNames(client) if err != nil { t.Error("TestGetProxyNames fail: ", err.Error()) } else { t.Log(names) } }
func TestAdd(t *testing.T) { cfg, err := config.GetProxyConfig() if err != nil { t.Error(err.Error()) } node := config.SSDBNode{ Host: cfg.ProxyRedisIp, Port: cfg.ProxyRedisPort, } client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100}) cfgs := cfg.Configs for _, cfg := range cfgs { err = Add(client, cfg) if err != nil { t.Error("TestAdd fail: ", err.Error()) } } }
func BenchmarkGetCount(b *testing.B) { cfg, err := config.Get("/etc/yascrapy/core.json") if err != nil { b.Error(err.Error()) } node := config.SSDBNode{ Host: cfg.ProxyRedisIp, Port: cfg.ProxyRedisPort, } client := ssdb.GetProxyClient(node, ssdb.Params{MaxActive: 100, MaxIdle: 100}) for i := 0; i < b.N; i++ { cnt, err := GetCount(client, "http_china") if err != nil { b.Error("TestGetCount fail ", err.Error()) } else { b.Log(cnt) } } }
func run(cfgFile string, consumerNumber int, crawlerName string) { var conn *amqp.Connection var consumers []*core_consumer.CoreConsumer cfg, err := config.Get(cfgFile) if err != nil { utils.Error.Println("read config file fail ", err.Error(), cfgFile) return } proxyNode := config.SSDBNode{ Host: cfg.ProxyRedisIp, Port: cfg.ProxyRedisPort, } ssdbNodes := cfg.SSDBNodes if len(ssdbNodes) == 0 { utils.Error.Println("ssdb nodes can not be empty") return } ssdbClients := ssdb.GetClients(ssdbNodes, cfg, ssdb.Params{MaxIdle: 5000, MaxActive: 5000}) proxyClient := ssdb.GetProxyClient(proxyNode, ssdb.Params{MaxIdle: 5000, MaxActive: 5000}) connPool := &rabbitmq.ConnPool{ MaxIdle: 10, MaxActive: 50, Dial: rabbitmq.CreateConn, Cfg: cfg, } maxConn := 1000 httpClients := CrawlerHttpClients(consumerNumber, maxConn) sigs := make(chan os.Signal, 1) signal.Notify(sigs, os.Interrupt) signal.Notify(sigs, syscall.SIGTERM) go func() { <-sigs cleanup(cfg, proxyClient) os.Exit(0) }() for { reqQueues, err := core_queue.GetQueues("http_request:", cfg) if err != nil { utils.Error.Printf("GetReqQueues error:%s\n", err.Error()) time.Sleep(1 * time.Second) continue } else { utils.Debug.Println("GetReqQueues success") } respQueues, err := core_queue.GetQueues("http_response:", cfg) if err != nil { utils.Error.Printf("GetRespQueues error:%s\n", err.Error()) time.Sleep(1 * time.Second) continue } else { utils.Debug.Println("GetRespQueues success") } conn = connPool.Get().(*amqp.Connection) reqQueues, err = core_queue.UpdateQueueCount(reqQueues, conn) if err != nil { utils.Error.Println(err.Error()) time.Sleep(1 * time.Second) continue } else { utils.Debug.Println("UpdateQueueCount success") } connPool.Release(conn) reqQueues = core_queue.RemoveEmptyQueues(reqQueues) utils.Debug.Println(reqQueues) if len(reqQueues) == 0 { utils.Info.Println("req queues empty") time.Sleep(1 * time.Second) continue } else { utils.Debug.Println("RemoveEmptyQueues success") } core_queue.DeclareResponseQueues(conn, reqQueues, 5) conn = connPool.Get().(*amqp.Connection) respQueues, err = core_queue.UpdateQueueCount(respQueues, conn) utils.Debug.Println(respQueues) if err != nil { conn = nil utils.Error.Println(err.Error()) time.Sleep(1 * time.Second) continue } connPool.Release(conn) if len(respQueues) == 0 { utils.Info.Println("resp queues empty") time.Sleep(1 * time.Second) continue } consumers = core_consumer.AliveCrawlerConsumers(consumers, crawlerName) utils.Debug.Println("total consumers: ", len(consumers), ", need start consumers:", consumerNumber-len(consumers)) newConsumers := consumerNumber - len(consumers) for i := 0; i < newConsumers; i++ { reqQueueName, err := core_queue.GetRandReqQueue(reqQueues, crawlerName) if err != nil { utils.Error.Printf(err.Error()) continue } utils.Debug.Printf("GetRandReqQueue %s\n", reqQueueName) respQueueName, err := core_queue.GetRandRespQueue(respQueues, crawlerName) if err != nil { utils.Error.Printf(err.Error()) continue } utils.Debug.Printf("GetRandRespQueue %s\n", respQueueName) index := i / maxConn c := core_consumer.CreateCoreConsumer(i, crawlerName, reqQueueName, respQueueName) consumers = append(consumers, c) utils.Debug.Println("add consumer", c) go consumer(c, connPool, ssdbClients, httpClients[index], proxyClient) } if len(consumers) < consumerNumber { utils.Warning.Printf("aliveConsumers now %d, lower than %d\n", len(consumers), consumerNumber) } utils.Debug.Println("total consumers ", len(consumers)) time.Sleep(1 * time.Second) } }