// ogtags extracts the og:title, og:image, ... tags from a webpage func defaultHTML(i *data.Item, sourceURL string, doc *goquery.Document) { fmt.Println("Running OG extract.") selection := doc.Find("title") if len(selection.Nodes) != 0 { i.Caption = selection.Nodes[0].FirstChild.Data } selection = doc.Find("meta[property*='og']") for _, e := range selection.Nodes { m := htmlAttributeToMap(e.Attr) if m["property"] == "og:title" { i.Caption = m["content"] } if m["property"] == "og:image" { if !govalidator.IsRequestURL(m["content"]) { log.Println("Invalid url in og:image. " + sourceURL) continue } i.ImageURL = m["content"] } if m["property"] == "og:url" { if !govalidator.IsRequestURL(m["content"]) { log.Println("Invalid url in og:url. " + sourceURL) continue } i.URL = m["content"] } if m["property"] == "og:description" { i.Description = m["content"] } } }
func garfield(i *data.Item, sourceURL string, doc *goquery.Document) { if !strings.Contains(sourceURL, "www.gocomics.com/garfield") { return } fmt.Println("Running Garfield plugin.") // update title selection := doc.Find(".strip") if len(selection.Nodes) == 0 { fmt.Println("Garfield plugin found no .strip. " + sourceURL) } else { if len(selection.Nodes) > 1 { fmt.Println("Garfield plugin found >1 .strip. " + sourceURL) } m := htmlAttributeToMap(selection.Nodes[0].Attr) if govalidator.IsRequestURL(m["src"]) { i.Description = "<img src =\"" i.Description += m["src"] i.Description += "\" />" } else { fmt.Println("Amazon plugin invalid url. " + m["src"]) } i.ImageURL = "" } }
func vimeo(i *data.Item, sourceURL string, doc *goquery.Document) { if !strings.Contains(sourceURL, "vimeo.com") { return } fmt.Println("Running Vimeo plugin.") // remove trailing '/' of the url, if any if string(sourceURL[len(sourceURL)-1]) == "/" { sourceURL = sourceURL[:len(sourceURL)-1] } videoIDstart := strings.LastIndex(sourceURL, "/") if videoIDstart == -1 { fmt.Println("Vimeo plugin found no '/' ??? " + sourceURL) return } videoIDstart++ videoID := sourceURL[videoIDstart:] i.Description += "<br/><br/><br/><iframe src=\"http://player.vimeo.com/video/" i.Description += videoID i.Description += "?title=0&byline=0&portrait=0\" width=\"400\" height=\"225\" frameborder=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen></iframe>" i.ImageURL = "" }
func image(i *data.Item, sourceURL string, contentType string) { if !(strings.Index(contentType, "image/") == 0) { return } fmt.Println("Running Image plugin.") i.ImageURL = "" i.Caption = sourceURL[strings.LastIndex(sourceURL, "/")+1:] i.Description = "<img src=\"" + sourceURL + "\">" }
func dilbert(i *data.Item, sourceURL string, doc *goquery.Document) { if !(strings.Contains(sourceURL, "feed.dilbert.com/") || strings.Contains(sourceURL, "dilbert.com/strips/")) { return } fmt.Println("Running Dilbert plugin.") selection := doc.Find(".STR_Image").Find("img") if len(selection.Nodes) == 0 { fmt.Println("Dilbert plugin found no .STR_Image. " + sourceURL) return } if len(selection.Nodes) > 1 { fmt.Println("Dilbert plugin found >1 .STR_Image. " + sourceURL) } e := selection.Nodes[0] if e.Type == html.ElementNode && e.Data == "img" { m := htmlAttributeToMap(e.Attr) u := "" if !strings.Contains(m["src"], "://dilbert.com") { u += "https://dilbert.com" } u += m["src"] if govalidator.IsRequestURL(u) { i.Description = "<img src=\"" i.Description += u i.Description += "\" />" } else { fmt.Println("Dilbert plugin invalid url. " + u) } } else { fmt.Println("Dilbert plugin no image tag where we expect one.") fmt.Println(e) } i.ImageURL = "" i.Caption = "Dilbert" }
func gfycat(i *data.Item, sourceURL string, doc *goquery.Document) { if !strings.Contains(sourceURL, "gfycat.com/") { return } fmt.Println("Running Gfycat plugin.") i.ImageURL = "" selection := doc.Find(".gfyVid") if len(selection.Nodes) == 0 { fmt.Println("Gfycat plugin found no .gfyVid. " + sourceURL) return } if len(selection.Nodes) > 1 { fmt.Println("Gfycat plugin found >1 .gfyVid. " + sourceURL) } buf := new(bytes.Buffer) err := html.Render(buf, selection.Nodes[0]) if err != nil { fmt.Println("Gfycat plugin error while rendering. " + sourceURL + "- " + err.Error()) return } i.Description = buf.String() selection = doc.Find(".gfyTitle") if len(selection.Nodes) == 0 { fmt.Println("Gfycat plugin found no .gfyTitle. " + sourceURL) return } if len(selection.Nodes) > 1 { fmt.Println("Gfycat plugin found >1 .gfyTitle. " + sourceURL) } if len(selection.Nodes) != 0 && selection.Nodes[0].FirstChild != nil { i.Caption = selection.Nodes[0].FirstChild.Data } else { i.Caption = "Gfycat" } }
func youtube(i *data.Item, sourceURL string, doc *goquery.Document) { if !strings.Contains(sourceURL, "www.youtube.com") { return } fmt.Println("Running Youtube plugin.") // update title videoIDstart := strings.Index(i.URL, "v=") if videoIDstart == -1 { fmt.Println("Youtube plugin found no video ID. " + sourceURL) return } videoIDstart += 2 // ID is after 'v=' videoID := i.URL[videoIDstart:] i.Description += "<br/><br/><br/><iframe width=\"560\" height=\"315\" src=\"http://www.youtube.com/embed/" i.Description += videoID i.Description += "\" frameborder=\"0\" allowfullscreen></iframe>" i.ImageURL = "" }
func fefe(i *data.Item, sourceURL string, doc *goquery.Document) { if !strings.Contains(sourceURL, "blog.fefe.de/?ts") { return } fmt.Println("Running Fefes Blog plugin.") selection := doc.Find("li") if len(selection.Nodes) == 0 { fmt.Println("Fefes Blog plugin found no li. " + sourceURL) return } if len(selection.Nodes) > 1 { fmt.Println("Fefes Blog plugin found >1 li. " + sourceURL) } buf := new(bytes.Buffer) err := html.Render(buf, selection.Nodes[0]) if err != nil { fmt.Println("Fefes Blog plugin error while rendering. " + sourceURL + "- " + err.Error()) return } i.Description = buf.String() start := strings.Index(i.Description, "</a>") + 4 end := strings.Index(i.Description, "</li>") i.Description = i.Description[start:end] words := strings.Fields(sanitize.HTML(i.Description)) i.Caption = "" for a := 0; len(i.Caption) < 20 && a < len(words); a++ { i.Caption += words[a] + " " } i.Caption = "Fefes Blog - " + strings.TrimSpace(i.Caption) + "..." i.ImageURL = "" }
func amazon(i *data.Item, sourceURL string, doc *goquery.Document) { if !strings.Contains(sourceURL, "www.amazon.") { return } fmt.Println("Running Amazon plugin.") // find picture { selection := doc.Find("#landingImage") if len(selection.Nodes) == 0 { fmt.Println("Amazon plugin found no #landingImage. " + sourceURL) } else { if len(selection.Nodes) > 1 { fmt.Println("Amazon plugin found >1 #landingImage. " + sourceURL) } for _, e := range selection.Nodes { if e.Type == html.ElementNode && e.Data == "img" { m := htmlAttributeToMap(e.Attr) if govalidator.IsRequestURL(m["data-old-hires"]) { i.ImageURL = m["data-old-hires"] } else { fmt.Println("Amazon plugin imgURL invalid. " + m["data-old-hires"]) } } } } } // update url to contain tag { // This is our tag. We should make it configurable urlExtension := "tag=" + "gschaftshuonl-21" start := strings.Index(i.URL, "tag=") if start != -1 { end := strings.Index(i.URL[start+1:], "&") + start + 1 i.URL = i.URL[:start] + i.URL[end:] } if strings.Index(i.URL, "?") == -1 { i.URL += "?" + urlExtension } else { i.URL += "&" + urlExtension } } // update title { selection := doc.Find("#productTitle") if len(selection.Nodes) == 0 { fmt.Println("Amazon plugin found no #productTitle. " + sourceURL) } else { if len(selection.Nodes) > 1 { fmt.Println("Amazon plugin found >1 #productTitle. " + sourceURL) } for _, e := range selection.Nodes { if e.Type == html.ElementNode && e.Data == "span" { i.Caption = e.FirstChild.Data } } } } }