Golang Request.GetUrlの例

プログラミング言語: Golang

名前空間/パッケージ名: github.com/henrylee2cn/pholcus/app/downloader/context

クラス/型: Request

メソッド/関数: GetUrl

hotexamples.comのコード掲載数: 10

Golang Request.GetUrl - 10件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたGolangのgithub.com/henrylee2cn/pholcus/app/downloader/context.Request.GetUrlの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

GetUrl(10)

GetMethod(8)

GetPriority(3)

GetReferer(3)

GetCookies(2)

GetSpiderName(2)

SetSpiderName(2)

Serialize(2)

GetSpiderId(2)

GetDownloaderID(2)

GetPostData(2)

GetHeader(2)

GetDuplicatable(2)

SetUrl(2)

GetUsePhantomJS(1)

IsReloadable(1)

SetReferer(1)

コード例 #1

ファイルを表示

ファイル: scheduler.go プロジェクト: jununfly/pholcus

// 添加请求到队列
func (self *scheduler) Push(req *context.Request) {
	// 初始化该蜘蛛的队列
	spiderId, ok := req.GetSpiderId()
	if !ok {
		return
	}

	self.RLock()
	defer self.RUnlock()

	if self.status == status.STOP {
		return
	}

	// 当req不可重复时，有重复则返回
	if !req.GetDuplicatable() && self.Deduplicate(req.GetUrl()+req.GetMethod()) {
		return
	}

	// 初始化该蜘蛛下该优先级队列
	priority := req.GetPriority()
	if !self.foundPriority(spiderId, priority) {
		self.addPriority(spiderId, priority)
	}

	defer func() {
		recover()
	}()

	// 添加请求到队列
	self.queue[spiderId][priority] = append(self.queue[spiderId][priority], req)
}

コード例 #2

ファイルを表示

ファイル: downloader_surfer.go プロジェクト: BobbWu/pholcus

func (self *Surfer) Download(cReq *context.Request) *context.Response {
	cResp := context.NewResponse(nil)

	var resp *http.Response
	var err error

	switch cReq.GetDownloaderID() {
	case SURF_ID:
		resp, err = self.surf.Download(cReq)
	case PHANTOM_ID:
		resp, err = self.phantom.Download(cReq)
	}

	if resp != nil {
		// 确保Response与Request中的Url字符串相等
		resp.Request.URL, _ = url.Parse(cReq.GetUrl())
	}

	cResp.SetRequest(cReq)

	cResp.SetResponse(resp)

	cResp.SetError(err)

	return cResp
}

コード例 #3

ファイルを表示

ファイル: downloader_surfer.go プロジェクト: no2key/pholcus-1

func (self *Surfer) Download(cReq *context.Request) *context.Response {
	cResp := context.NewResponse(nil)

	resp, err := self.download.Download(cReq.GetMethod(), cReq.GetUrl(), cReq.GetReferer(), cReq.GetPostData(), cReq.GetHeader(), cReq.GetCookies())

	cResp.SetRequest(cReq)

	cResp.SetResponse(resp)

	cResp.SetError(err)

	return cResp
}

コード例 #4

ファイルを表示

ファイル: scheduler.go プロジェクト: TonyFStark/pholcus

func (self *Matrix) SetFailure(req *context.Request) bool {
	self.Lock()
	defer self.Unlock()
	unique := makeUnique(req)
	if _, ok := self.failures[unique]; !ok {
		// 首次失败时，在任务队列末尾重新执行一次
		self.failures[unique] = req
		logs.Log.Informational(" *     + 失败请求: [%v]\n", req.GetUrl())
		return true
	}
	// 失败两次后，加入历史失败记录
	UpsertFailure(req)
	return false
}

コード例 #5

ファイルを表示

ファイル: scheduler.go プロジェクト: JanChou/pholcus

// 添加请求到队列
func (self *scheduler) Push(req *context.Request) {
	self.pushMutex.Lock()
	defer self.pushMutex.Unlock()

	if self.status == status.STOP {
		return
	}

	// 当req不可重复时，有重复则返回
	if !req.GetDuplicatable() && self.Deduplicate(req.GetUrl()+req.GetMethod()) {
		return
	}

	self.SrcManage.Push(req)
}

コード例 #6

ファイルを表示

ファイル: crawl.go プロジェクト: WangCrystal/pholcus

// core processer
func (self *crawler) Process(req *context.Request) {

	defer func() {
		if err := recover(); err != nil { // do not affect other
			logs.Log.Error(" *     Process panic: %v", err)
		}
	}()
	// logs.Log.Debug("**************断点 1 ***********")
	// download page
	resp := self.Downloader.Download(req)

	// logs.Log.Debug("**************断点 2 ***********")
	// if fail do not need process
	if resp.GetError() != nil {
		// 取消该请求的去重样本
		scheduler.Sdl.DelDeduplication(req.GetUrl() + req.GetMethod())
		logs.Log.Error(" *     %v", resp.GetError())
		// 统计下载失败的页数
		cache.PageFailCount()
		return
	}

	// logs.Log.Debug("**************断点 3 ***********")
	// 过程处理，提炼数据
	self.Spider.ExecParse(resp)
	// logs.Log.Debug("**************断点 5 ***********")
	// 该条请求文本结果存入pipeline
	for _, data := range resp.GetItems() {
		self.Pipeline.CollectData(
			resp.GetRuleName(), //DataCell.RuleName
			data,               //DataCell.Data
			resp.GetUrl(),      //DataCell.Url
			resp.GetReferer(),  //DataCell.ParentUrl
			time.Now().Format("2006-01-02 15:04:05"),
		)
	}

	// 该条请求文件结果存入pipeline
	for _, img := range resp.GetFiles() {
		self.Pipeline.CollectFile(
			resp.GetRuleName(),
			img["Name"].(string),
			img["Body"].(io.ReadCloser),
		)
	}

	// logs.Log.Debug("**************断点 end ***********")
}

コード例 #7

ファイルを表示

ファイル: downloader_surfer.go プロジェクト: treejames/pholcus-1

func (self *Surfer) Download(cReq *context.Request) *context.Response {
	cResp := context.NewResponse(nil)

	resp, err := self.download.Download(cReq.GetMethod(), cReq.GetUrl(), cReq.GetReferer(), cReq.GetPostData(), cReq.GetHeader(), cReq.GetCookies())

	cResp.SetRequest(cReq)

	cResp.SetResponse(resp)

	if err != nil {
		logs.Log.Error(" *     %v", err)
		// cResp.SetStatus(false, err.Error())
		// return cResp
	}

	cResp.SetStatus(true, "")
	return cResp
}

コード例 #8

ファイルを表示

ファイル: scheduler.go プロジェクト: no2key/pholcus-1

// 添加请求到队列
func (self *scheduler) Push(req *context.Request) {
	pushMutex.Lock()
	defer func() {
		pushMutex.Unlock()
	}()

	if self.status == status.STOP {
		return
	}

	// 有重复则返回
	if self.Compare(req.GetUrl() + req.GetMethod()) {
		return
	}

	// 留作未来分发请求用
	// if pholcus.Self.GetRunMode() == config.SERVER || req.CanOutsource() {
	// 	return
	// }

	self.SrcManage.Push(req)
}

コード例 #9

ファイルを表示

ファイル: crawl.go プロジェクト: BobbWu/pholcus

// core processer
func (self *crawler) Process(req *context.Request) {
	defer func() {
		if err := recover(); err != nil {
			// do not affect other
			scheduler.Sdl.DelDeduplication(req.GetUrl() + req.GetMethod())
			// 统计失败数
			cache.PageFailCount()
			// 提示错误
			logs.Log.Error(" *     Fail [process panic]: %v", err)
		}
	}()
	// download page
	resp := self.Downloader.Download(req)

	// if fail do not need process
	if resp.GetError() != nil {
		// 删除该请求的去重样本
		scheduler.Sdl.DelDeduplication(req.GetUrl() + req.GetMethod())
		// 统计失败数
		cache.PageFailCount()
		// 提示错误
		logs.Log.Error(" *     Fail [download]: %v", resp.GetError())
		return
	}

	// 过程处理，提炼数据
	spider.NewContext(self.Spider, resp).Parse(resp.GetRuleName())

	// 统计成功页数
	cache.PageSuccCount()
	// 提示抓取成功
	logs.Log.Informational(" *     Success: %v", req.GetUrl())

	// 该条请求文本结果存入pipeline
	for _, data := range resp.GetItems() {
		self.Pipeline.CollectData(
			resp.GetRuleName(), //DataCell.RuleName
			data,               //DataCell.Data
			resp.GetUrl(),      //DataCell.Url
			resp.GetReferer(),  //DataCell.ParentUrl
			time.Now().Format("2006-01-02 15:04:05"),
		)
	}

	// 该条请求文件结果存入pipeline
	for _, img := range resp.GetFiles() {
		self.Pipeline.CollectFile(
			resp.GetRuleName(),
			img["Name"].(string),
			img["Body"].(io.ReadCloser),
		)
	}
}

コード例 #10

ファイルを表示

ファイル: scheduler.go プロジェクト: TonyFStark/pholcus

func makeUnique(req *context.Request) string {
	return util.MakeUnique(req.GetUrl() + req.GetMethod())
}