Ejemplo n.º 1
0
//输出JsonResult到http response
func OutputJsonResult(w http.ResponseWriter, result types.JsonResult) {
	resultJson, err := json.Marshal(result)
	if err != nil {
		log.Debugln("encode json result error: ", err)
	}
	w.Write(resultJson)
}
Ejemplo n.º 2
0
//hget hostname domain
func (this *PoliteVisitor) GetLastVisitTime(domain string, hostname string) int64 {
	client, err := this.pool.Get()
	defer this.pool.Put(client)
	var ret int64 = -1
	if err != nil {
		log.Errorln("get redis client error: ", err)
	} else {
		host := this.canonicalHostname(hostname)
		dm := this.canonicalDomain(domain)
		key := this.makeRedisKey(host)
		resp := client.Cmd("hget", key, dm)
		if !resp.IsType(redis.Nil) {
			ts, err := resp.Int64()
			if err != nil {
				log.Debugln("convert redis response to int64 error: ", err, " resp:", resp)
			} else {
				ret = ts
			}
		} else {
			log.Debugln("hget ", key, "->", dm, " return nil")
		}
	}
	return ret
}
Ejemplo n.º 3
0
func (this *Fetcher) fetchPage(pageStore PageStore) {
	defer this.wg.Done()

	httpClient := lib.HttpClient{}
loop:
	for {
		select {
		case taskPack := <-this.taskQueue:
			destUrl := taskPack.Domain + taskPack.Urlpath
			html, err := httpClient.Get(destUrl)
			log.Debugln("goto fetch ", destUrl)
			done := 0
			if err == nil {
				//report success to scheduler, make a log, save html
				html, err = httpClient.IconvHtml(html, "utf-8")
				done = 1
				log.Infoln("fetch '" + destUrl + "' done.")
				err = pageStore.Save(taskPack.Domain, taskPack.Urlpath, string(html))
				if err != nil {
					log.Errorln("fetcher save ", taskPack.Domain, taskPack.Urlpath, " error:", err)
				}
			} else {
				//report fail to scheduler
				log.Errorln("fetch '"+destUrl+"' failed!", err)
			}
			//向scheduler报告任务完成情况
			reportUrl := fmt.Sprintf("http://%s%s?task_id=%d&done=%d", this.scheduler_addr, this.scheduler_api["report"], taskPack.TaskId, done)
			res, err := httpClient.Get(reportUrl)
			if err != nil {
				log.Errorln("report ", reportUrl, " failed!")
			} else {
				result := types.JsonResult{}
				err = json.Unmarshal(res, &result)
				if err != nil || result.Err != 0 {
					log.Errorln("report ", reportUrl, ", get error response: ", string(res))
				}
			}
		case <-this.quitChan:
			//this.quitChan should be closed somewhere
			log.Infoln("quit fetch page...")
			break loop
		}
	}
}
Ejemplo n.º 4
0
func (this *LocalPageStore) Save(domain string, urlpath string, page string) error {
	md5Bytes := md5.Sum([]byte(urlpath))
	domain = this.canonicalDomain(domain)
	destDir := filepath.Join(this.dir, domain, fmt.Sprintf("%x", md5Bytes))
	log.Debugln("destDir: ", destDir)
	err := os.MkdirAll(destDir, os.ModePerm)
	if err != nil {
		log.Errorln("mkdir for '"+domain+"/"+urlpath+"' error: ", err)
	} else {
		now := time.Now().Unix()
		fname := fmt.Sprintf("%s/%d", destDir, now)
		err = ioutil.WriteFile(fname, []byte(page), 0666)
		if err != nil {
			log.Errorln("save page: "+domain+"/"+urlpath+" error:", err)
		}
	}

	return err
}
Ejemplo n.º 5
0
//hset hostname domain ts
func (this *PoliteVisitor) SetLastVisitTime(domain string, hostname string, ts int64) error {
	client, err := this.pool.Get()
	defer this.pool.Put(client)
	if err != nil {
		log.Errorln("get redis client error: ", err)
	} else {
		host := this.canonicalHostname(hostname)
		dm := this.canonicalDomain(domain)
		key := this.makeRedisKey(host)
		var n int64
		resp := client.Cmd("hset", key, dm, ts)
		n, err = resp.Int64()
		if err != nil {
			log.Errorln("hset ", key, " ", dm, " ", ts, " error: ", err)
		} else {
			log.Debugln("hset ", key, " ", dm, " ", ts, " updated: ", n)
		}
	}
	return err
}
Ejemplo n.º 6
0
func (this *Fetcher) pushTasksHandler(w http.ResponseWriter, req *http.Request) {
	log.Debugln("get request: ", req.RemoteAddr, req.URL)
	req.ParseForm()
	tasksJson := req.Form.Get("tasks")
	taskPacks := []types.TaskPack{}
	var err error = nil
	var result = types.JsonResult{}
	if tasksJson != "" {
		err = json.Unmarshal([]byte(tasksJson), &taskPacks)
		if err != nil {
			msg := "Unmarshal task packs error: " + err.Error()
			log.Errorln(msg)
			result.Err = ErrDataError
			result.Msg = msg
		} else {
			//添加任务到队列
			//最多只允许执行1秒钟
			timerChan := time.After(1 * time.Second)
			cnt := 0
			for _, pack := range taskPacks {
				select {
				case this.taskQueue <- pack:
					cnt++
				case <-timerChan:
					break
				}
			}
			result.Err = ErrOk
			result.Data = taskPacks[:cnt] //将成功进入队列的任务返回
		}
	} else {
		msg := "missing `tasks` key or has no content in the POST request."
		log.Warnln(msg)
		result.Err = ErrInputError
		result.Msg = msg
	}
	utils.OutputJsonResult(w, result)
}