Ejemplo n.º 1
0
func MakeDoubanSpider() *spiders.Spider {
	spider := &spiders.Spider{}
	spider.Name = "douban_img_spider"
	spider.StartUrls = []string{"http://movie.douban.com/"}
	spider.ParseMap = make(map[string]func(response *http.Response) ([]*http.Request, error))
	spider.ParseMap[spiders.BASE_PARSE_NAME] = func(response *http.Response) ([]*http.Request, error) {
		if response.Request.Depth > 10 {
			return nil, nil
		}
		doc, err := goquery.NewDocumentFromReader(strings.NewReader(response.Body))
		if err != nil {
			return nil, err
		}
		nodes := doc.Find("#page .n").Nodes
		if len(nodes) == 0 {
			return nil, err
		}
		nextNode := nodes[len(nodes)-1]
		attrList := nextNode.Attr
		var nextPageLink string
		for _, attr := range attrList {
			if attr.Key == "href" {
				nextPageLink = attr.Val
				break
			}
		}
		nextPage := "http://www.baidu.com" + nextPageLink
		request, err := http.NewRequest("GET", nextPage, spider.Name, spiders.BASE_PARSE_NAME, nil, 0)
		requestList := make([]*http.Request, 0)
		requestList = append(requestList, request)
		return requestList, nil
	}
	return spider
}
Ejemplo n.º 2
0
func MakeMuiltiplySpiders() *spiders.Spider {
	spider := spiders.Spider{}
	spider.Name = "muiltiply_spider"
	spider.StartUrls = []string{"http://www.baidu.com/s?wd=1"}
	spider.ParseMap = make(map[string]func(response *http.Response) ([]*http.Request, error))
	spider.ParseMap[spiders.BASE_PARSE_NAME] = func(response *http.Response) ([]*http.Request, error) {
		doc, err := goquery.NewDocumentFromReader(strings.NewReader(response.Body))
		if err != nil {
			return nil, err
		}
		requestList := make([]*http.Request, 0, 10)
		doc.Find("#page a").Each(func(index int, hrefNode *goquery.Selection) {
			href, isExist := hrefNode.Attr("href")
			if !isExist {
				return
			}
			nextPage := "http://www.baidu.com" + href
			request, err := http.NewRequest("GET", nextPage, spider.Name, spiders.BASE_PARSE_NAME, nil, 0)
			if err != nil {
				log.Println(err)
			}
			requestList = append(requestList, request)
		})
		return requestList, nil
	}
	return &spider
}
Ejemplo n.º 3
0
func Base(response *http.Response) ([]*http.Request, error) {
	requestList := make([]*http.Request, 0)
	doc, err := goquery.NewDocumentFromReader(strings.NewReader(response.Body))
	if err != nil {
		return requestList, err
	}
	xsrf, exist := doc.Find(".zu-side-login-box input[name=_xsrf]").Attr("value")
	if !exist {
		return requestList, nil
	}
	userList := [][]string{{"*****@*****.**", ""}, {"*****@*****.**", ""}}
	for index, user := range userList {
		value := make(url.Values)
		value.Set("_xsrf", xsrf)
		value.Set("email", user[0])
		value.Set("password", user[1])
		value.Set("rememberme", "y")
		request, requestErr := http.NewRequest("POST", "http://www.zhihu.com/login", zhihuSpider.Name, "Index", strings.NewReader(value.Encode()), index+2)
		if requestErr != nil {
			log.Println(err)
			continue
		}
		requestList = append(requestList, request)
	}
	return requestList, nil
}
Ejemplo n.º 4
0
func MakeStockSpider() *spiders.Spider {
	spider := spiders.Spider{}
	spider.Name = "stock_spider"
	spider.BeforeMethod = func() {
		db.DefaultMysqlConnectMap.InitConnection(spider.Name, "root:@/wxstock?charset=utf8")
	}
	spider.AfterMethod = func() {
		db.DefaultMysqlConnectMap.CloseContection(spider.Name)
	}
	urlPrefix := "http://money.finance.sina.com.cn/d/api/openapi_proxy.php/?__s=[[%22hq%22,%22hs_a%22,%22%22,0,"
	urlSuffix := ",40]]&callback=FDC_DC.theTableData"
	pageSize := 40
	spider.StartUrls = []string{urlPrefix + "0" + urlSuffix}
	spider.ParseMap = make(map[string]func(response *http.Response) ([]*http.Request, error))
	spider.ParseMap[spiders.BASE_PARSE_NAME] = func(response *http.Response) ([]*http.Request, error) {
		body := response.Body
		startIndex := strings.Index(body, "FDC_DC.theTableData")
		jsonString := body[startIndex+len("FDC_DC.theTableData(") : len(body)-2]
		var jsonResult []Stock
		err := json.Unmarshal([]byte(jsonString), &jsonResult)
		if err != nil {
			log.Println(err)
		}
		for _, item := range jsonResult[0].Items {
			code := item[1]
			name := item[2]
			rows, err := db.DefaultMysqlConnectMap.Query(spider.Name, "select id from db_stock where code= ? ", code)
			if err != nil {
				log.Println(err)
				continue
			}
			if !rows.Next() {
				rows.Close()
				db.DefaultMysqlConnectMap.Exec(spider.Name, "insert into db_stock(gmt_create,code,name)values(now(), ?, ? )", code, name)
			} else {
				rows.Close()
			}
		}
		if strings.Contains(response.GoResponse.Request.URL.RawQuery, ",0,0,40]]") {
			totalPage := int(math.Ceil(float64(jsonResult[0].Count) / float64(pageSize)))
			requestList := make([]*http.Request, totalPage, totalPage)
			pageNo := 1
			for pageNo <= totalPage {
				request, err := http.NewRequest("GET", urlPrefix+strconv.Itoa(pageNo)+urlSuffix, spider.Name, spiders.BASE_PARSE_NAME, nil, 0)
				if err != nil {
					log.Println(err)
				}
				requestList[pageNo-1] = request
				pageNo += 1
			}
			return requestList, nil
		}
		return nil, nil
	}
	return &spider
}
Ejemplo n.º 5
0
func (this *Spider) MakeStartRequests() []*http.Request {
	if this.InitStartUrls != nil {
		this.InitStartUrls()
	}
	startRequestSlice := make([]*http.Request, len(this.StartUrls))
	for index, url := range this.StartUrls {
		request, err := http.NewRequest("GET", url, this.Name, BASE_PARSE_NAME, nil, 0)
		if err != nil {
			log.Println(err)
			continue
		}
		startRequestSlice[index] = request
	}
	return startRequestSlice
}
Ejemplo n.º 6
0
func Index(response *http.Response) ([]*http.Request, error) {
	requestList := make([]*http.Request, 1)
	request, _ := http.NewRequest("GET", "http://www.zhihu.com", zhihuSpider.Name, "GetId", nil, response.Request.CookieJar)
	requestList[0] = request
	return requestList, nil
}