Пример #1
0
func (self *CustomExtender) Visit(ctx *gocrawl.URLContext, res *http.Response, doc *goquery.Document) (interface{}, bool) {
	fmt.Println(ctx.NormalizedURL().String())

	db := GetConn()
	mIns, err := db.Prepare("INSERT INTO mz(photo_href, photo_thumb_src, photo_large_src, photo_public_src, people_href) VALUES( ?, ?, ?, ?, ? )") // ? = 占位符

	if err != nil {
		panic(err.Error())
	}

	defer mIns.Close() // main结束是关闭

	//fmt.Println(doc.Find(".photo_wrap").Text())

	doc.Find(".photo_wrap").Each(func(i int, s *goquery.Selection) {
		// For each item found, get the band and title
		// fmt.Println(s.Find("a").First().Attr("title"))
		// fmt.Println(s.Find("a").First().Attr("href"))
		// fmt.Println(s.Find("img").First().Attr("src"))

		var photo_href, photo_thumb_src, photo_large_src, photo_public_src, people_href string
		photo_href = first(s.Find("a").First().Attr("href")).(string)
		photo_thumb_src = first(s.Find("img").First().Attr("src")).(string)
		people_href = first(s.Find("a").First().Attr("title")).(string)

		_, err = mIns.Exec(photo_href, photo_thumb_src, photo_large_src, photo_public_src, people_href)

		// 执行插入
		if err != nil {
			panic(err.Error())
		}
	})

	// if rxGrep.MatchString(ctx.NormalizedURL().String()) {
	// // print problem title
	// fmt.Println(doc.Find("h1").Text())
	// }

	// defer db.Close()

	return nil, true
}
Пример #2
0
// Override Filter for our need.
func (x *ExampleExtender) Filter(ctx *gocrawl.URLContext, isVisited bool) bool {
	return !isVisited && rxOk.MatchString(ctx.NormalizedURL().String())
}
Пример #3
0
func (self *CustomExtender) Filter(ctx *gocrawl.URLContext, isVisited bool) bool {
	// fmt.Println(ctx.NormalizedURL().String())
	return !isVisited && rxOk.MatchString(ctx.NormalizedURL().String())
}