//输出JsonResult到http response func OutputJsonResult(w http.ResponseWriter, result types.JsonResult) { resultJson, err := json.Marshal(result) if err != nil { log.Debugln("encode json result error: ", err) } w.Write(resultJson) }
//hget hostname domain func (this *PoliteVisitor) GetLastVisitTime(domain string, hostname string) int64 { client, err := this.pool.Get() defer this.pool.Put(client) var ret int64 = -1 if err != nil { log.Errorln("get redis client error: ", err) } else { host := this.canonicalHostname(hostname) dm := this.canonicalDomain(domain) key := this.makeRedisKey(host) resp := client.Cmd("hget", key, dm) if !resp.IsType(redis.Nil) { ts, err := resp.Int64() if err != nil { log.Debugln("convert redis response to int64 error: ", err, " resp:", resp) } else { ret = ts } } else { log.Debugln("hget ", key, "->", dm, " return nil") } } return ret }
func (this *Fetcher) fetchPage(pageStore PageStore) { defer this.wg.Done() httpClient := lib.HttpClient{} loop: for { select { case taskPack := <-this.taskQueue: destUrl := taskPack.Domain + taskPack.Urlpath html, err := httpClient.Get(destUrl) log.Debugln("goto fetch ", destUrl) done := 0 if err == nil { //report success to scheduler, make a log, save html html, err = httpClient.IconvHtml(html, "utf-8") done = 1 log.Infoln("fetch '" + destUrl + "' done.") err = pageStore.Save(taskPack.Domain, taskPack.Urlpath, string(html)) if err != nil { log.Errorln("fetcher save ", taskPack.Domain, taskPack.Urlpath, " error:", err) } } else { //report fail to scheduler log.Errorln("fetch '"+destUrl+"' failed!", err) } //向scheduler报告任务完成情况 reportUrl := fmt.Sprintf("http://%s%s?task_id=%d&done=%d", this.scheduler_addr, this.scheduler_api["report"], taskPack.TaskId, done) res, err := httpClient.Get(reportUrl) if err != nil { log.Errorln("report ", reportUrl, " failed!") } else { result := types.JsonResult{} err = json.Unmarshal(res, &result) if err != nil || result.Err != 0 { log.Errorln("report ", reportUrl, ", get error response: ", string(res)) } } case <-this.quitChan: //this.quitChan should be closed somewhere log.Infoln("quit fetch page...") break loop } } }
func (this *LocalPageStore) Save(domain string, urlpath string, page string) error { md5Bytes := md5.Sum([]byte(urlpath)) domain = this.canonicalDomain(domain) destDir := filepath.Join(this.dir, domain, fmt.Sprintf("%x", md5Bytes)) log.Debugln("destDir: ", destDir) err := os.MkdirAll(destDir, os.ModePerm) if err != nil { log.Errorln("mkdir for '"+domain+"/"+urlpath+"' error: ", err) } else { now := time.Now().Unix() fname := fmt.Sprintf("%s/%d", destDir, now) err = ioutil.WriteFile(fname, []byte(page), 0666) if err != nil { log.Errorln("save page: "+domain+"/"+urlpath+" error:", err) } } return err }
//hset hostname domain ts func (this *PoliteVisitor) SetLastVisitTime(domain string, hostname string, ts int64) error { client, err := this.pool.Get() defer this.pool.Put(client) if err != nil { log.Errorln("get redis client error: ", err) } else { host := this.canonicalHostname(hostname) dm := this.canonicalDomain(domain) key := this.makeRedisKey(host) var n int64 resp := client.Cmd("hset", key, dm, ts) n, err = resp.Int64() if err != nil { log.Errorln("hset ", key, " ", dm, " ", ts, " error: ", err) } else { log.Debugln("hset ", key, " ", dm, " ", ts, " updated: ", n) } } return err }
func (this *Fetcher) pushTasksHandler(w http.ResponseWriter, req *http.Request) { log.Debugln("get request: ", req.RemoteAddr, req.URL) req.ParseForm() tasksJson := req.Form.Get("tasks") taskPacks := []types.TaskPack{} var err error = nil var result = types.JsonResult{} if tasksJson != "" { err = json.Unmarshal([]byte(tasksJson), &taskPacks) if err != nil { msg := "Unmarshal task packs error: " + err.Error() log.Errorln(msg) result.Err = ErrDataError result.Msg = msg } else { //添加任务到队列 //最多只允许执行1秒钟 timerChan := time.After(1 * time.Second) cnt := 0 for _, pack := range taskPacks { select { case this.taskQueue <- pack: cnt++ case <-timerChan: break } } result.Err = ErrOk result.Data = taskPacks[:cnt] //将成功进入队列的任务返回 } } else { msg := "missing `tasks` key or has no content in the POST request." log.Warnln(msg) result.Err = ErrInputError result.Msg = msg } utils.OutputJsonResult(w, result) }