// ogtags extracts the og:title, og:image, ... tags from a webpage func defaultHTML(i *data.Item, sourceURL string, doc *goquery.Document) { fmt.Println("Running OG extract.") selection := doc.Find("title") if len(selection.Nodes) != 0 { i.Caption = selection.Nodes[0].FirstChild.Data } selection = doc.Find("meta[property*='og']") for _, e := range selection.Nodes { m := htmlAttributeToMap(e.Attr) if m["property"] == "og:title" { i.Caption = m["content"] } if m["property"] == "og:image" { if !govalidator.IsRequestURL(m["content"]) { log.Println("Invalid url in og:image. " + sourceURL) continue } i.ImageURL = m["content"] } if m["property"] == "og:url" { if !govalidator.IsRequestURL(m["content"]) { log.Println("Invalid url in og:url. " + sourceURL) continue } i.URL = m["content"] } if m["property"] == "og:description" { i.Description = m["content"] } } }
func amazon(i *data.Item, sourceURL string, doc *goquery.Document) { if !strings.Contains(sourceURL, "www.amazon.") { return } fmt.Println("Running Amazon plugin.") // find picture { selection := doc.Find("#landingImage") if len(selection.Nodes) == 0 { fmt.Println("Amazon plugin found no #landingImage. " + sourceURL) } else { if len(selection.Nodes) > 1 { fmt.Println("Amazon plugin found >1 #landingImage. " + sourceURL) } for _, e := range selection.Nodes { if e.Type == html.ElementNode && e.Data == "img" { m := htmlAttributeToMap(e.Attr) if govalidator.IsRequestURL(m["data-old-hires"]) { i.ImageURL = m["data-old-hires"] } else { fmt.Println("Amazon plugin imgURL invalid. " + m["data-old-hires"]) } } } } } // update url to contain tag { // This is our tag. We should make it configurable urlExtension := "tag=" + "gschaftshuonl-21" start := strings.Index(i.URL, "tag=") if start != -1 { end := strings.Index(i.URL[start+1:], "&") + start + 1 i.URL = i.URL[:start] + i.URL[end:] } if strings.Index(i.URL, "?") == -1 { i.URL += "?" + urlExtension } else { i.URL += "&" + urlExtension } } // update title { selection := doc.Find("#productTitle") if len(selection.Nodes) == 0 { fmt.Println("Amazon plugin found no #productTitle. " + sourceURL) } else { if len(selection.Nodes) > 1 { fmt.Println("Amazon plugin found >1 #productTitle. " + sourceURL) } for _, e := range selection.Nodes { if e.Type == html.ElementNode && e.Data == "span" { i.Caption = e.FirstChild.Data } } } } }