func main() { x, _ := goquery.Parse(example) x.Find("a div").Val("lol") fmt.Println(x.Html()) fmt.Println(x.Find("div").HasClass("yo")) fmt.Println(x.Find("").Attrs("href")) }
func getMostSimilarAltImage(bodyStr string, i *ItemObject) { nodes, err := goquery.Parse(bodyStr) if err != nil { fmt.Println(err) return } nodes = nodes.Find("img") maxScore := 2 image := "" for _, node := range nodes { src, alt, width, _ := Attr(node) if !strings.HasPrefix(src, "http") { continue } score := GetSimilarityScore(i.Title, alt) if score > maxScore { image = src maxScore = width } } if image != "" { i.ParsedImage = image } }
func getWidestImage(bodyStr string, i *ItemObject) { nodes, err := goquery.Parse(bodyStr) if err != nil { fmt.Println(err) return } nodes = nodes.Find("img") maxWidth := 200 maxAlt := 100 image := "" for _, node := range nodes { src, alt, width, _ := Attr(node) if !strings.HasPrefix(src, "http") { continue } if width > maxWidth && isImageGood(src) { image = src maxWidth = width } if image == "" && len(alt) > maxAlt && width != 1 && isImageGood(src) { image = src maxAlt = len(alt) } } if image != "" { i.ParsedImage = image } }
func getImageFromClass(body, class string, i *ItemObject) { nodes, err := goquery.Parse(body) if err != nil { fmt.Println(err) return } nodes = nodes.Find(class) getWidestImage(nodes.Html(), i) }
func getSvg(svgBytes []byte) string { svg := bytes.NewReader(svgBytes) doc, err := goquery.Parse(svg) check(err) icon := doc.Find("symbol").OuterHtml() if icon == "" { logAndExit("I haven't found any <symbol> in the svg") } return icon }
func getLinksFromBody(body io.Reader, host string) []string { tree, _ := goquery.Parse(body) links := tree.Find("a").Attrs("href") normals := make([]string, 0, len(links)) for _, link := range links { link, ok := normilizeLink(link, host) if ok { normals = append(normals, link) } } return normals }
func getOGImage(body string, i *ItemObject) { nodes, err := goquery.Parse(body) if err != nil { fmt.Println(err) return } nodes = nodes.Find("meta") for _, node := range nodes { property, content := MetaAttr(node) if property == "og:image" && isImageGood(content) { i.ParsedImage = content return } } }
func (ps *Parser) parseHtml() []string { q, _ := goquery.Parse(strings.NewReader(ps.FileContent.Content)) ls := []string{} var nodes goquery.Nodes // 链接 nodes = q.Find("a") ls = append(ls, ps.getAttr(nodes, "href", nil)...) //css nodes = q.Find("link") ls = append(ls, ps.getAttr(nodes, "href", map[string]string{"type": "text/css"})...) //js nodes = q.Find("script") ls = append(ls, ps.getAttr(nodes, "src", nil)...) //图片 nodes = q.Find("img") ls = append(ls, ps.getAttr(nodes, "src", nil)...) return ls }
func main() { x, _ := goquery.Parse(example) x.Find("a div").Print() fmt.Println("---") x.Find("a div.cow").Print() }