func itemURL(s *goquery.Selection) (url string) { s.Closest("tr").Next().Find("a").Each(func(_ int, s *goquery.Selection) { href, _ := s.Attr("href") if strings.HasPrefix(href, "item?id=") { url = hnURL + href } }) return }
func guessImgEnvironment(s *goquery.Selection) *goquery.Selection { possibleEnvs := []string{ "figure", ".inline-image", // bbc ".inline-media__unlinked-image", // bloomberg "header .inline-media", // bloomberg "bigimgWrap", // bloomberg ".story-img", // technology review ".img-left", // technology review ".article-img", // technology review ".img-wrap", // the intercept ".overlap-img", // nautilus ".inline-content", // washington post ".image", // npr.org ".centered-figure-container", // npr.org ".slate-image", // slate.com ".wp-caption", // mcntyr.com and quantamagazine.com ".article__image", // aeon } var possibleParents []*goquery.Selection for _, env := range possibleEnvs { parent := s.Closest(env) if parent.Nodes != nil { Debug("Found img env: %v", env) possibleParents = append(possibleParents, parent) } } if len(possibleParents) == 0 { Debug("No parents found") return s } var htmlPossibleParents []string for _, p := range possibleParents { h, _ := p.Html() htmlPossibleParents = append(htmlPossibleParents, h) } indexLongestElement, _ := longestElement(htmlPossibleParents) Debug("longest Env: %v", htmlPossibleParents[indexLongestElement]) return possibleParents[indexLongestElement] }
func parentAttr(s *goquery.Selection, attr string) string { sel := "[" + attr + "]" res, _ := s.Closest(sel).Attr(attr) return res }