func record(level byte, content string) { if content == "" { return } switch level { case 0: log.Info(content) case 1: log.Warnf(content) case 2: log.Error(content) } }
// 把请求存放到请求缓存。 func (sched *myScheduler) saveReqToCache(req base.Request, code string) bool { httpReq := req.HttpReq() if httpReq == nil { log.Warnf("Ignore the request! It's HTTP request is invalid!") return false } reqUrl := httpReq.URL if reqUrl == nil { log.Warnf("Ignore the request! It's url is is invalid!") return false } if strings.ToLower(reqUrl.Scheme) != "http" { log.Warnf("Ignore the request! It's url scheme '%s', but should be 'http'!\n", reqUrl.Scheme) return false } if _, ok := sched.urlMap[reqUrl.String()]; ok { log.Warnf("Ignore the request! It's url is repeated. (requestUrl=%s)\n", reqUrl) return false } if pd, _ := getPrimaryDomain(httpReq.Host); pd != sched.primaryDomain { log.Warnf("Ignore the request! It's host '%s' not in primary domain '%s'. (requestUrl=%s)\n", httpReq.Host, sched.primaryDomain, reqUrl) return false } if req.Depth() > sched.crawlDepth { log.Warnf("Ignore the request! It's depth %d greater than %d. (requestUrl=%s)\n", req.Depth(), sched.crawlDepth, reqUrl) return false } if sched.stopSign.Signed() { sched.stopSign.Deal(code) return false } sched.reqCache.put(&req) sched.urlMap[reqUrl.String()] = true return true }