Example #1
0
func main() {
	x, _ := goquery.Parse(example)
	x.Find("a div").Val("lol")
	fmt.Println(x.Html())
	fmt.Println(x.Find("div").HasClass("yo"))
	fmt.Println(x.Find("").Attrs("href"))
}
Example #2
0
func getMostSimilarAltImage(bodyStr string, i *ItemObject) {
	nodes, err := goquery.Parse(bodyStr)

	if err != nil {
		fmt.Println(err)
		return
	}

	nodes = nodes.Find("img")

	maxScore := 2
	image := ""
	for _, node := range nodes {
		src, alt, width, _ := Attr(node)
		if !strings.HasPrefix(src, "http") {
			continue
		}

		score := GetSimilarityScore(i.Title, alt)
		if score > maxScore {
			image = src
			maxScore = width
		}
	}

	if image != "" {
		i.ParsedImage = image
	}
}
Example #3
0
func getWidestImage(bodyStr string, i *ItemObject) {
	nodes, err := goquery.Parse(bodyStr)

	if err != nil {
		fmt.Println(err)
		return
	}

	nodes = nodes.Find("img")

	maxWidth := 200
	maxAlt := 100
	image := ""
	for _, node := range nodes {
		src, alt, width, _ := Attr(node)
		if !strings.HasPrefix(src, "http") {
			continue
		}

		if width > maxWidth && isImageGood(src) {
			image = src
			maxWidth = width
		}

		if image == "" && len(alt) > maxAlt && width != 1 && isImageGood(src) {
			image = src
			maxAlt = len(alt)
		}
	}

	if image != "" {
		i.ParsedImage = image
	}
}
Example #4
0
func getImageFromClass(body, class string, i *ItemObject) {
	nodes, err := goquery.Parse(body)

	if err != nil {
		fmt.Println(err)
		return
	}

	nodes = nodes.Find(class)
	getWidestImage(nodes.Html(), i)
}
Example #5
0
func getSvg(svgBytes []byte) string {
	svg := bytes.NewReader(svgBytes)
	doc, err := goquery.Parse(svg)
	check(err)

	icon := doc.Find("symbol").OuterHtml()

	if icon == "" {
		logAndExit("I haven't found any <symbol> in the svg")
	}

	return icon
}
Example #6
0
func getLinksFromBody(body io.Reader, host string) []string {
	tree, _ := goquery.Parse(body)

	links := tree.Find("a").Attrs("href")

	normals := make([]string, 0, len(links))

	for _, link := range links {
		link, ok := normilizeLink(link, host)
		if ok {
			normals = append(normals, link)
		}
	}

	return normals
}
Example #7
0
func getOGImage(body string, i *ItemObject) {
	nodes, err := goquery.Parse(body)

	if err != nil {
		fmt.Println(err)
		return
	}

	nodes = nodes.Find("meta")

	for _, node := range nodes {
		property, content := MetaAttr(node)
		if property == "og:image" && isImageGood(content) {
			i.ParsedImage = content
			return
		}
	}
}
Example #8
0
func (ps *Parser) parseHtml() []string {
	q, _ := goquery.Parse(strings.NewReader(ps.FileContent.Content))

	ls := []string{}

	var nodes goquery.Nodes
	// 链接
	nodes = q.Find("a")
	ls = append(ls, ps.getAttr(nodes, "href", nil)...)

	//css
	nodes = q.Find("link")
	ls = append(ls, ps.getAttr(nodes, "href", map[string]string{"type": "text/css"})...)

	//js
	nodes = q.Find("script")
	ls = append(ls, ps.getAttr(nodes, "src", nil)...)

	//图片
	nodes = q.Find("img")
	ls = append(ls, ps.getAttr(nodes, "src", nil)...)

	return ls
}
Example #9
0
func main() {
	x, _ := goquery.Parse(example)
	x.Find("a div").Print()
	fmt.Println("---")
	x.Find("a div.cow").Print()
}