Example #1
0
func (this *Engine) SetStartUrl(url string) *Engine {
	r := common.NewRequest(url)
	this.hook(plugin.BeforeSchedulerType, r)
	this.scheduler.Push(r)
	this.hook(plugin.AfterSchedulerType)
	return this
}
Example #2
0
func (this *Engine) SetStartUrls(urls []string) *Engine {
	for _, url := range urls {
		r := common.NewRequest(url)
		this.hook(plugin.BeforeSchedulerType, r)
		this.scheduler.Push(r)
		this.hook(plugin.AfterSchedulerType)
	}
	return this
}
func (this *QuickEngineProcesser) processRequests(resp *common.Response, y *common.Yield, rule _Rule) {
	var TrimFunc extractor.TrimFunc
	switch rule.RequestRule.TrimFunc {
	case "trim_html_tags":
		TrimFunc = extractor.TrimHtmlTags
	case "trim_blank":
		TrimFunc = extractor.TrimBlank
	}

	items := extractor.NewExtractor().
		SetScopeRule(rule.RequestRule.ScopeRule).
		SetRules(rule.RequestRule.KVRule).
		SetTrimFunc(TrimFunc).
		Extract(resp)
	for _, item := range items {
		for _, url := range item.GetAll() {
			if strings.HasPrefix(url, "http://") {
				y.AddRequest(common.NewRequest(url))
			} else {
				y.AddRequest(common.NewRequest(rule.BaseUrl + url))
			}
		}
	}
}
Example #4
0
func (this *Validator) genRequests(urls []string, tableName string, level int, db *sql.DB) []*common.Request {
	proxies, _ := util.GetLastProxies(tableName, db)
	reqs := []*common.Request{}
	for _, url := range urls {
		for _, proxy := range proxies {
			req := common.NewRequest(url)
			req.ProxyUrl = proxy
			reqs = append(reqs, req)
		}
	}

	rand.Seed(time.Now().Unix())
	this.Shuffle(reqs)
	return reqs
}
Example #5
0
func GetCookieFunc(req *common.Request) (*cookiejar.Jar, error) {
	if _, ok := gAuth.IsAuthed[req.ProxyUrl]; ok {
		log.Printf("have authed %+v\n", gAuth.Jar[req.ProxyUrl])
		return gAuth.Jar[req.ProxyUrl], nil
	}

	baseUrl := "http://bgp.he.net"
	transport := &http.Transport{
		Proxy: http.ProxyURL(&url.URL{Host: req.ProxyUrl}),
		Dial: func(netw, addr string) (net.Conn, error) {
			c, err := net.DialTimeout(netw, addr, gConfig.GetConnectionTimeout())
			if err != nil {
				return nil, err
			}
			return c, nil
		},
		ResponseHeaderTimeout: gConfig.GetDownloadTimeout(),
		MaxIdleConnsPerHost:   gConfig.GetMaxIdleConnsPerHost(),
	}
	gAuth.Jar[req.ProxyUrl], _ = cookiejar.New(nil)
	client := &http.Client{
		Jar:       gAuth.Jar[req.ProxyUrl],
		Timeout:   2 * gConfig.GetDownloadTimeout(),
		Transport: transport,
	}

	var p string
	var i string
	{
		u := baseUrl + "/i"
		resp, err := common.NewCurl(client, common.NewRequest(u)).Do()
		if err != nil {
			log.Printf("1. auth failed(%s) %s\n", u, err)
			return nil, err
		}
		i = strings.Trim(resp.Response.Header.Get("ETag"), "\"")
	}
	{
		u := baseUrl + "/dns/qq.com"
		_, err := common.NewCurl(client, common.NewRequest(u)).Do()
		if err != nil {
			log.Printf("2. auth failed(%s) %s\n", u, err)
			return nil, err
		}
		path := ""
		for _, c := range gAuth.Jar[req.ProxyUrl].Cookies(req.Request.URL) {
			if c.Name == "path" {
				path = c.Value
				break
			}
		}
		decodedPath, _ := url.QueryUnescape(path)
		p = fmt.Sprintf("%x", md5.Sum([]byte(decodedPath)))
	}
	{
		u := baseUrl + "/cc"
		_, err := common.NewCurl(client, common.NewRequest(u)).Do()
		if err != nil {
			log.Printf("3. auth failed(%s) %s\n", u, err)
			return nil, err
		}
	}
	{
		u := baseUrl + "/jc"
		form := url.Values{}
		form.Add("p", p)
		form.Add("i", i)
		r := common.NewRequest(u)
		r.Request, _ = http.NewRequest("POST", u, strings.NewReader(form.Encode()))
		_, err := common.NewCurl(client, r).Do()
		if err != nil {
			log.Printf("4.auth failed(%s) %s\n", u, err)
			return nil, err
		}
	}
	gAuth.IsAuthed[req.ProxyUrl] = true
	log.Printf("auth succeed %+v\n", gAuth.Jar[req.ProxyUrl])
	return gAuth.Jar[req.ProxyUrl], nil
}
func (this *MyProcesser) processNext(resp *common.Response, y *common.Yield) {
	m := regexp.MustCompile(`(?s)<span class="next">.*?<a href="(.*?)"`).FindStringSubmatch(resp.Body)
	if len(m) > 0 {
		y.AddRequest(common.NewRequest(this.baseUrl + m[1]))
	}
}