Golang Request.GetUrl примеры использования

Язык программирования: Golang

Пространство имен/Пакет: github.com/henrylee2cn/pholcus/app/downloader/context

Класс/Тип: Request

Метод/Функция: GetUrl

Примеров на hotexamples.com: 10

Golang Request.GetUrl - 10 примеров найдено. Это лучшие примеры Golang кода для github.com/henrylee2cn/pholcus/app/downloader/context.Request.GetUrl, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GetUrl(10)

GetMethod(8)

GetPriority(3)

GetReferer(3)

GetCookies(2)

GetSpiderName(2)

SetSpiderName(2)

Serialize(2)

GetSpiderId(2)

GetDownloaderID(2)

GetPostData(2)

GetHeader(2)

GetDuplicatable(2)

SetUrl(2)

GetUsePhantomJS(1)

IsReloadable(1)

SetReferer(1)

Пример #1

Показать файл

Файл: scheduler.go Проект: jununfly/pholcus

// 添加请求到队列
func (self *scheduler) Push(req *context.Request) {
	// 初始化该蜘蛛的队列
	spiderId, ok := req.GetSpiderId()
	if !ok {
		return
	}

	self.RLock()
	defer self.RUnlock()

	if self.status == status.STOP {
		return
	}

	// 当req不可重复时，有重复则返回
	if !req.GetDuplicatable() && self.Deduplicate(req.GetUrl()+req.GetMethod()) {
		return
	}

	// 初始化该蜘蛛下该优先级队列
	priority := req.GetPriority()
	if !self.foundPriority(spiderId, priority) {
		self.addPriority(spiderId, priority)
	}

	defer func() {
		recover()
	}()

	// 添加请求到队列
	self.queue[spiderId][priority] = append(self.queue[spiderId][priority], req)
}

Пример #2

Показать файл

Файл: downloader_surfer.go Проект: BobbWu/pholcus

func (self *Surfer) Download(cReq *context.Request) *context.Response {
	cResp := context.NewResponse(nil)

	var resp *http.Response
	var err error

	switch cReq.GetDownloaderID() {
	case SURF_ID:
		resp, err = self.surf.Download(cReq)
	case PHANTOM_ID:
		resp, err = self.phantom.Download(cReq)
	}

	if resp != nil {
		// 确保Response与Request中的Url字符串相等
		resp.Request.URL, _ = url.Parse(cReq.GetUrl())
	}

	cResp.SetRequest(cReq)

	cResp.SetResponse(resp)

	cResp.SetError(err)

	return cResp
}

Пример #3

Показать файл

Файл: downloader_surfer.go Проект: no2key/pholcus-1

func (self *Surfer) Download(cReq *context.Request) *context.Response {
	cResp := context.NewResponse(nil)

	resp, err := self.download.Download(cReq.GetMethod(), cReq.GetUrl(), cReq.GetReferer(), cReq.GetPostData(), cReq.GetHeader(), cReq.GetCookies())

	cResp.SetRequest(cReq)

	cResp.SetResponse(resp)

	cResp.SetError(err)

	return cResp
}

Пример #4

Показать файл

Файл: scheduler.go Проект: TonyFStark/pholcus

func (self *Matrix) SetFailure(req *context.Request) bool {
	self.Lock()
	defer self.Unlock()
	unique := makeUnique(req)
	if _, ok := self.failures[unique]; !ok {
		// 首次失败时，在任务队列末尾重新执行一次
		self.failures[unique] = req
		logs.Log.Informational(" *     + 失败请求: [%v]\n", req.GetUrl())
		return true
	}
	// 失败两次后，加入历史失败记录
	UpsertFailure(req)
	return false
}

Пример #5

Показать файл

Файл: scheduler.go Проект: JanChou/pholcus

// 添加请求到队列
func (self *scheduler) Push(req *context.Request) {
	self.pushMutex.Lock()
	defer self.pushMutex.Unlock()

	if self.status == status.STOP {
		return
	}

	// 当req不可重复时，有重复则返回
	if !req.GetDuplicatable() && self.Deduplicate(req.GetUrl()+req.GetMethod()) {
		return
	}

	self.SrcManage.Push(req)
}

Пример #6

Показать файл

Файл: crawl.go Проект: WangCrystal/pholcus

// core processer
func (self *crawler) Process(req *context.Request) {

	defer func() {
		if err := recover(); err != nil { // do not affect other
			logs.Log.Error(" *     Process panic: %v", err)
		}
	}()
	// logs.Log.Debug("**************断点 1 ***********")
	// download page
	resp := self.Downloader.Download(req)

	// logs.Log.Debug("**************断点 2 ***********")
	// if fail do not need process
	if resp.GetError() != nil {
		// 取消该请求的去重样本
		scheduler.Sdl.DelDeduplication(req.GetUrl() + req.GetMethod())
		logs.Log.Error(" *     %v", resp.GetError())
		// 统计下载失败的页数
		cache.PageFailCount()
		return
	}

	// logs.Log.Debug("**************断点 3 ***********")
	// 过程处理，提炼数据
	self.Spider.ExecParse(resp)
	// logs.Log.Debug("**************断点 5 ***********")
	// 该条请求文本结果存入pipeline
	for _, data := range resp.GetItems() {
		self.Pipeline.CollectData(
			resp.GetRuleName(), //DataCell.RuleName
			data,               //DataCell.Data
			resp.GetUrl(),      //DataCell.Url
			resp.GetReferer(),  //DataCell.ParentUrl
			time.Now().Format("2006-01-02 15:04:05"),
		)
	}

	// 该条请求文件结果存入pipeline
	for _, img := range resp.GetFiles() {
		self.Pipeline.CollectFile(
			resp.GetRuleName(),
			img["Name"].(string),
			img["Body"].(io.ReadCloser),
		)
	}

	// logs.Log.Debug("**************断点 end ***********")
}

Пример #7

Показать файл

Файл: downloader_surfer.go Проект: treejames/pholcus-1

func (self *Surfer) Download(cReq *context.Request) *context.Response {
	cResp := context.NewResponse(nil)

	resp, err := self.download.Download(cReq.GetMethod(), cReq.GetUrl(), cReq.GetReferer(), cReq.GetPostData(), cReq.GetHeader(), cReq.GetCookies())

	cResp.SetRequest(cReq)

	cResp.SetResponse(resp)

	if err != nil {
		logs.Log.Error(" *     %v", err)
		// cResp.SetStatus(false, err.Error())
		// return cResp
	}

	cResp.SetStatus(true, "")
	return cResp
}

Пример #8

Показать файл

Файл: scheduler.go Проект: no2key/pholcus-1

// 添加请求到队列
func (self *scheduler) Push(req *context.Request) {
	pushMutex.Lock()
	defer func() {
		pushMutex.Unlock()
	}()

	if self.status == status.STOP {
		return
	}

	// 有重复则返回
	if self.Compare(req.GetUrl() + req.GetMethod()) {
		return
	}

	// 留作未来分发请求用
	// if pholcus.Self.GetRunMode() == config.SERVER || req.CanOutsource() {
	// 	return
	// }

	self.SrcManage.Push(req)
}

Пример #9

Показать файл

Файл: crawl.go Проект: BobbWu/pholcus

// core processer
func (self *crawler) Process(req *context.Request) {
	defer func() {
		if err := recover(); err != nil {
			// do not affect other
			scheduler.Sdl.DelDeduplication(req.GetUrl() + req.GetMethod())
			// 统计失败数
			cache.PageFailCount()
			// 提示错误
			logs.Log.Error(" *     Fail [process panic]: %v", err)
		}
	}()
	// download page
	resp := self.Downloader.Download(req)

	// if fail do not need process
	if resp.GetError() != nil {
		// 删除该请求的去重样本
		scheduler.Sdl.DelDeduplication(req.GetUrl() + req.GetMethod())
		// 统计失败数
		cache.PageFailCount()
		// 提示错误
		logs.Log.Error(" *     Fail [download]: %v", resp.GetError())
		return
	}

	// 过程处理，提炼数据
	spider.NewContext(self.Spider, resp).Parse(resp.GetRuleName())

	// 统计成功页数
	cache.PageSuccCount()
	// 提示抓取成功
	logs.Log.Informational(" *     Success: %v", req.GetUrl())

	// 该条请求文本结果存入pipeline
	for _, data := range resp.GetItems() {
		self.Pipeline.CollectData(
			resp.GetRuleName(), //DataCell.RuleName
			data,               //DataCell.Data
			resp.GetUrl(),      //DataCell.Url
			resp.GetReferer(),  //DataCell.ParentUrl
			time.Now().Format("2006-01-02 15:04:05"),
		)
	}

	// 该条请求文件结果存入pipeline
	for _, img := range resp.GetFiles() {
		self.Pipeline.CollectFile(
			resp.GetRuleName(),
			img["Name"].(string),
			img["Body"].(io.ReadCloser),
		)
	}
}

Пример #10

Показать файл

Файл: scheduler.go Проект: TonyFStark/pholcus

func makeUnique(req *context.Request) string {
	return util.MakeUnique(req.GetUrl() + req.GetMethod())
}