// ogtags extracts the og:title, og:image, ... tags from a webpage
func defaultHTML(i *data.Item, sourceURL string, doc *goquery.Document) {
	fmt.Println("Running OG extract.")

	selection := doc.Find("title")
	if len(selection.Nodes) != 0 {
		i.Caption = selection.Nodes[0].FirstChild.Data
	}

	selection = doc.Find("meta[property*='og']")

	for _, e := range selection.Nodes {
		m := htmlAttributeToMap(e.Attr)

		if m["property"] == "og:title" {
			i.Caption = m["content"]
		}
		if m["property"] == "og:image" {
			if !govalidator.IsRequestURL(m["content"]) {
				log.Println("Invalid url in og:image. " + sourceURL)
				continue
			}
			i.ImageURL = m["content"]
		}
		if m["property"] == "og:url" {
			if !govalidator.IsRequestURL(m["content"]) {
				log.Println("Invalid url in og:url. " + sourceURL)
				continue
			}
			i.URL = m["content"]
		}
		if m["property"] == "og:description" {
			i.Description = m["content"]
		}
	}
}
Exemple #2
0
func amazon(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "www.amazon.") {
		return
	}

	fmt.Println("Running Amazon plugin.")

	// find picture
	{
		selection := doc.Find("#landingImage")
		if len(selection.Nodes) == 0 {
			fmt.Println("Amazon plugin found no #landingImage. " + sourceURL)
		} else {
			if len(selection.Nodes) > 1 {
				fmt.Println("Amazon plugin found >1 #landingImage. " + sourceURL)
			}
			for _, e := range selection.Nodes {
				if e.Type == html.ElementNode && e.Data == "img" {
					m := htmlAttributeToMap(e.Attr)
					if govalidator.IsRequestURL(m["data-old-hires"]) {
						i.ImageURL = m["data-old-hires"]
					} else {
						fmt.Println("Amazon plugin imgURL invalid. " + m["data-old-hires"])
					}
				}
			}
		}
	}

	// update url to contain tag
	{
		// This is our tag. We should make it configurable
		urlExtension := "tag=" + "gschaftshuonl-21"
		start := strings.Index(i.URL, "tag=")
		if start != -1 {
			end := strings.Index(i.URL[start+1:], "&") + start + 1
			i.URL = i.URL[:start] + i.URL[end:]
		}

		if strings.Index(i.URL, "?") == -1 {
			i.URL += "?" + urlExtension
		} else {
			i.URL += "&" + urlExtension
		}
	}

	// update title
	{
		selection := doc.Find("#productTitle")
		if len(selection.Nodes) == 0 {
			fmt.Println("Amazon plugin found no #productTitle. " + sourceURL)
		} else {
			if len(selection.Nodes) > 1 {
				fmt.Println("Amazon plugin found >1 #productTitle. " + sourceURL)
			}
			for _, e := range selection.Nodes {
				if e.Type == html.ElementNode && e.Data == "span" {
					i.Caption = e.FirstChild.Data
				}
			}
		}

	}
}