func (this *Engine) SetStartUrl(url string) *Engine { r := common.NewRequest(url) this.hook(plugin.BeforeSchedulerType, r) this.scheduler.Push(r) this.hook(plugin.AfterSchedulerType) return this }
func (this *Engine) SetStartUrls(urls []string) *Engine { for _, url := range urls { r := common.NewRequest(url) this.hook(plugin.BeforeSchedulerType, r) this.scheduler.Push(r) this.hook(plugin.AfterSchedulerType) } return this }
func (this *QuickEngineProcesser) processRequests(resp *common.Response, y *common.Yield, rule _Rule) { var TrimFunc extractor.TrimFunc switch rule.RequestRule.TrimFunc { case "trim_html_tags": TrimFunc = extractor.TrimHtmlTags case "trim_blank": TrimFunc = extractor.TrimBlank } items := extractor.NewExtractor(). SetScopeRule(rule.RequestRule.ScopeRule). SetRules(rule.RequestRule.KVRule). SetTrimFunc(TrimFunc). Extract(resp) for _, item := range items { for _, url := range item.GetAll() { if strings.HasPrefix(url, "http://") { y.AddRequest(common.NewRequest(url)) } else { y.AddRequest(common.NewRequest(rule.BaseUrl + url)) } } } }
func (this *Validator) genRequests(urls []string, tableName string, level int, db *sql.DB) []*common.Request { proxies, _ := util.GetLastProxies(tableName, db) reqs := []*common.Request{} for _, url := range urls { for _, proxy := range proxies { req := common.NewRequest(url) req.ProxyUrl = proxy reqs = append(reqs, req) } } rand.Seed(time.Now().Unix()) this.Shuffle(reqs) return reqs }
func GetCookieFunc(req *common.Request) (*cookiejar.Jar, error) { if _, ok := gAuth.IsAuthed[req.ProxyUrl]; ok { log.Printf("have authed %+v\n", gAuth.Jar[req.ProxyUrl]) return gAuth.Jar[req.ProxyUrl], nil } baseUrl := "http://bgp.he.net" transport := &http.Transport{ Proxy: http.ProxyURL(&url.URL{Host: req.ProxyUrl}), Dial: func(netw, addr string) (net.Conn, error) { c, err := net.DialTimeout(netw, addr, gConfig.GetConnectionTimeout()) if err != nil { return nil, err } return c, nil }, ResponseHeaderTimeout: gConfig.GetDownloadTimeout(), MaxIdleConnsPerHost: gConfig.GetMaxIdleConnsPerHost(), } gAuth.Jar[req.ProxyUrl], _ = cookiejar.New(nil) client := &http.Client{ Jar: gAuth.Jar[req.ProxyUrl], Timeout: 2 * gConfig.GetDownloadTimeout(), Transport: transport, } var p string var i string { u := baseUrl + "/i" resp, err := common.NewCurl(client, common.NewRequest(u)).Do() if err != nil { log.Printf("1. auth failed(%s) %s\n", u, err) return nil, err } i = strings.Trim(resp.Response.Header.Get("ETag"), "\"") } { u := baseUrl + "/dns/qq.com" _, err := common.NewCurl(client, common.NewRequest(u)).Do() if err != nil { log.Printf("2. auth failed(%s) %s\n", u, err) return nil, err } path := "" for _, c := range gAuth.Jar[req.ProxyUrl].Cookies(req.Request.URL) { if c.Name == "path" { path = c.Value break } } decodedPath, _ := url.QueryUnescape(path) p = fmt.Sprintf("%x", md5.Sum([]byte(decodedPath))) } { u := baseUrl + "/cc" _, err := common.NewCurl(client, common.NewRequest(u)).Do() if err != nil { log.Printf("3. auth failed(%s) %s\n", u, err) return nil, err } } { u := baseUrl + "/jc" form := url.Values{} form.Add("p", p) form.Add("i", i) r := common.NewRequest(u) r.Request, _ = http.NewRequest("POST", u, strings.NewReader(form.Encode())) _, err := common.NewCurl(client, r).Do() if err != nil { log.Printf("4.auth failed(%s) %s\n", u, err) return nil, err } } gAuth.IsAuthed[req.ProxyUrl] = true log.Printf("auth succeed %+v\n", gAuth.Jar[req.ProxyUrl]) return gAuth.Jar[req.ProxyUrl], nil }
func (this *MyProcesser) processNext(resp *common.Response, y *common.Yield) { m := regexp.MustCompile(`(?s)<span class="next">.*?<a href="(.*?)"`).FindStringSubmatch(resp.Body) if len(m) > 0 { y.AddRequest(common.NewRequest(this.baseUrl + m[1])) } }