Beispiel #1
0
func itemURL(s *goquery.Selection) (url string) {
	s.Closest("tr").Next().Find("a").Each(func(_ int, s *goquery.Selection) {
		href, _ := s.Attr("href")
		if strings.HasPrefix(href, "item?id=") {
			url = hnURL + href
		}
	})
	return
}
Beispiel #2
0
func guessImgEnvironment(s *goquery.Selection) *goquery.Selection {
	possibleEnvs := []string{
		"figure",
		".inline-image",                 // bbc
		".inline-media__unlinked-image", // bloomberg
		"header .inline-media",          // bloomberg
		"bigimgWrap",                    // bloomberg
		".story-img",                    // technology review
		".img-left",                     // technology review
		".article-img",                  // technology review
		".img-wrap",                     // the intercept
		".overlap-img",                  // nautilus
		".inline-content",               // washington post
		".image",                        // npr.org
		".centered-figure-container",    // npr.org
		".slate-image",                  // slate.com
		".wp-caption",                   // mcntyr.com and quantamagazine.com
		".article__image",               // aeon
	}

	var possibleParents []*goquery.Selection
	for _, env := range possibleEnvs {

		parent := s.Closest(env)
		if parent.Nodes != nil {
			Debug("Found img env: %v", env)
			possibleParents = append(possibleParents, parent)
		}
	}
	if len(possibleParents) == 0 {
		Debug("No parents found")
		return s
	}

	var htmlPossibleParents []string
	for _, p := range possibleParents {
		h, _ := p.Html()
		htmlPossibleParents = append(htmlPossibleParents, h)
	}
	indexLongestElement, _ := longestElement(htmlPossibleParents)

	Debug("longest Env: %v", htmlPossibleParents[indexLongestElement])

	return possibleParents[indexLongestElement]
}
Beispiel #3
0
func parentAttr(s *goquery.Selection, attr string) string {
	sel := "[" + attr + "]"
	res, _ := s.Closest(sel).Attr(attr)
	return res
}