Example #1
0
func (this *Spider) Run() {
	if this.threadnum == 0 {
		this.threadnum = 1
	}
	this.mc = resource_manage.NewResourceManageChan(this.threadnum)

	//init db  by sorawa

	for {
		req := this.pScheduler.Poll()

		// mc is not atomic
		if this.mc.Has() == 0 && req == nil && this.exitWhenComplete {
			mlog.StraceInst().Println("** executed callback **")
			this.pPageProcesser.Finish()
			mlog.StraceInst().Println("** end spider **")
			break
		} else if req == nil {
			time.Sleep(500 * time.Millisecond)
			//mlog.StraceInst().Println("scheduler is empty")
			continue
		}
		this.mc.GetOne()

		// Asynchronous fetching
		go func(req *request.Request) {
			defer this.mc.FreeOne()
			//time.Sleep( time.Duration(rand.Intn(5)) * time.Second)
			mlog.StraceInst().Println("start crawl : " + req.GetUrl())
			this.pageProcess(req)
		}(req)
	}
	this.close()
}
func TestResourceManage(t *testing.T) {
	var mc *resource_manage.ResourceManageChan
	mc = resource_manage.NewResourceManageChan(1)
	mc.GetOne()
	println("incr")
	mc.FreeOne()
	println("decr")
	mc.GetOne()
	println("incr")
}