Пример #1
0
// Links returns the absolute URLs of all references from an URL of a webpage.
func Links(u string) ([]string, error) {
	s := newSelection("a[href]", u)
	link, err := url.Parse(s.URL)
	if err != nil {
		return nil, err
	}
	r, err := http.Get(link.String())
	if err != nil {
		return nil, err
	}
	defer r.Body.Close()
	doc, err := html.Parse(r.Body)
	if err != nil {
		return nil, err
	}
	sel, err := cascadia.Compile(s.Selector)
	if err != nil {
		return nil, err
	}
	matches := sel.MatchAll(doc)
	var result []string
	for _, m := range matches {
		r, err := resolveURL(hrefString(m), link)
		if err != nil {
			return nil, err
		}
		result = append(result, r)
	}
	return result, nil
}
Пример #2
0
func matchSelector(s *Selection, client *http.Client) []string {
	link, err := url.Parse(s.URL)
	if err != nil {
		log.Fatal("Incorrect url")
		return nil
	}
	r, err := client.Get(link.String())
	if err != nil {
		log.Fatal(err)

	}
	doc, err := html.Parse(r.Body)
	if err != nil {
		log.Fatal(err)
	}
	sel, err := cascadia.Compile(s.Selector)
	if err != nil {
		log.Fatal(err)
	}
	matches := sel.MatchAll(doc)
	var result []string
	for _, m := range matches {
		result = append(result, nodeString(m))
	}
	return result
}