Пример #1
0
// ogtags extracts the og:title, og:image, ... tags from a webpage
func defaultHTML(i *data.Item, sourceURL string, doc *goquery.Document) {
	fmt.Println("Running OG extract.")

	selection := doc.Find("title")
	if len(selection.Nodes) != 0 {
		i.Caption = selection.Nodes[0].FirstChild.Data
	}

	selection = doc.Find("meta[property*='og']")

	for _, e := range selection.Nodes {
		m := htmlAttributeToMap(e.Attr)

		if m["property"] == "og:title" {
			i.Caption = m["content"]
		}
		if m["property"] == "og:image" {
			if !govalidator.IsRequestURL(m["content"]) {
				log.Println("Invalid url in og:image. " + sourceURL)
				continue
			}
			i.ImageURL = m["content"]
		}
		if m["property"] == "og:url" {
			if !govalidator.IsRequestURL(m["content"]) {
				log.Println("Invalid url in og:url. " + sourceURL)
				continue
			}
			i.URL = m["content"]
		}
		if m["property"] == "og:description" {
			i.Description = m["content"]
		}
	}
}
Пример #2
0
func garfield(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "www.gocomics.com/garfield") {
		return
	}

	fmt.Println("Running Garfield plugin.")

	// update title

	selection := doc.Find(".strip")
	if len(selection.Nodes) == 0 {
		fmt.Println("Garfield plugin found no .strip. " + sourceURL)
	} else {
		if len(selection.Nodes) > 1 {
			fmt.Println("Garfield plugin found >1 .strip. " + sourceURL)
		}
		m := htmlAttributeToMap(selection.Nodes[0].Attr)

		if govalidator.IsRequestURL(m["src"]) {
			i.Description = "<img src =\""
			i.Description += m["src"]
			i.Description += "\" />"
		} else {
			fmt.Println("Amazon plugin invalid url. " + m["src"])
		}
		i.ImageURL = ""
	}

}
Пример #3
0
func vimeo(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "vimeo.com") {
		return
	}

	fmt.Println("Running Vimeo plugin.")

	// remove trailing '/' of the url, if any
	if string(sourceURL[len(sourceURL)-1]) == "/" {
		sourceURL = sourceURL[:len(sourceURL)-1]
	}
	videoIDstart := strings.LastIndex(sourceURL, "/")
	if videoIDstart == -1 {
		fmt.Println("Vimeo plugin found no '/' ??? " + sourceURL)
		return
	}

	videoIDstart++
	videoID := sourceURL[videoIDstart:]
	i.Description += "<br/><br/><br/><iframe src=\"http://player.vimeo.com/video/"
	i.Description += videoID
	i.Description += "?title=0&amp;byline=0&amp;portrait=0\" width=\"400\" height=\"225\" frameborder=\"0\" webkitAllowFullScreen mozallowfullscreen allowFullScreen></iframe>"

	i.ImageURL = ""
}
Пример #4
0
func image(i *data.Item, sourceURL string, contentType string) {
	if !(strings.Index(contentType, "image/") == 0) {
		return
	}

	fmt.Println("Running Image plugin.")

	i.ImageURL = ""
	i.Caption = sourceURL[strings.LastIndex(sourceURL, "/")+1:]
	i.Description = "<img src=\"" + sourceURL + "\">"

}
Пример #5
0
func dilbert(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !(strings.Contains(sourceURL, "feed.dilbert.com/") ||
		strings.Contains(sourceURL, "dilbert.com/strips/")) {
		return
	}

	fmt.Println("Running Dilbert plugin.")

	selection := doc.Find(".STR_Image").Find("img")

	if len(selection.Nodes) == 0 {
		fmt.Println("Dilbert plugin found no .STR_Image. " + sourceURL)
		return
	}

	if len(selection.Nodes) > 1 {
		fmt.Println("Dilbert plugin found >1 .STR_Image. " + sourceURL)
	}

	e := selection.Nodes[0]
	if e.Type == html.ElementNode && e.Data == "img" {
		m := htmlAttributeToMap(e.Attr)
		u := ""
		if !strings.Contains(m["src"], "://dilbert.com") {
			u += "https://dilbert.com"
		}

		u += m["src"]
		if govalidator.IsRequestURL(u) {
			i.Description = "<img src=\""
			i.Description += u
			i.Description += "\" />"
		} else {
			fmt.Println("Dilbert plugin invalid url. " + u)
		}

	} else {
		fmt.Println("Dilbert plugin no image tag where we expect one.")
		fmt.Println(e)
	}

	i.ImageURL = ""
	i.Caption = "Dilbert"
}
Пример #6
0
func gfycat(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "gfycat.com/") {
		return
	}

	fmt.Println("Running Gfycat plugin.")

	i.ImageURL = ""

	selection := doc.Find(".gfyVid")

	if len(selection.Nodes) == 0 {
		fmt.Println("Gfycat plugin found no .gfyVid. " + sourceURL)
		return
	}
	if len(selection.Nodes) > 1 {
		fmt.Println("Gfycat plugin found >1 .gfyVid. " + sourceURL)
	}
	buf := new(bytes.Buffer)
	err := html.Render(buf, selection.Nodes[0])
	if err != nil {
		fmt.Println("Gfycat plugin error while rendering. " + sourceURL + "- " + err.Error())
		return
	}

	i.Description = buf.String()

	selection = doc.Find(".gfyTitle")
	if len(selection.Nodes) == 0 {
		fmt.Println("Gfycat plugin found no .gfyTitle. " + sourceURL)
		return
	}
	if len(selection.Nodes) > 1 {
		fmt.Println("Gfycat plugin found >1 .gfyTitle. " + sourceURL)
	}
	if len(selection.Nodes) != 0 && selection.Nodes[0].FirstChild != nil {
		i.Caption = selection.Nodes[0].FirstChild.Data
	} else {
		i.Caption = "Gfycat"
	}

}
Пример #7
0
func youtube(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "www.youtube.com") {
		return
	}

	fmt.Println("Running Youtube plugin.")

	// update title

	videoIDstart := strings.Index(i.URL, "v=")
	if videoIDstart == -1 {
		fmt.Println("Youtube plugin found no video ID. " + sourceURL)
		return
	}
	videoIDstart += 2 // ID is after 'v='
	videoID := i.URL[videoIDstart:]
	i.Description += "<br/><br/><br/><iframe width=\"560\" height=\"315\" src=\"http://www.youtube.com/embed/"
	i.Description += videoID
	i.Description += "\" frameborder=\"0\" allowfullscreen></iframe>"

	i.ImageURL = ""
}
Пример #8
0
func fefe(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "blog.fefe.de/?ts") {
		return
	}
	fmt.Println("Running Fefes Blog plugin.")

	selection := doc.Find("li")

	if len(selection.Nodes) == 0 {
		fmt.Println("Fefes Blog plugin found no li. " + sourceURL)
		return
	}

	if len(selection.Nodes) > 1 {
		fmt.Println("Fefes Blog plugin found >1 li. " + sourceURL)
	}

	buf := new(bytes.Buffer)
	err := html.Render(buf, selection.Nodes[0])
	if err != nil {
		fmt.Println("Fefes Blog plugin error while rendering. " + sourceURL + "- " + err.Error())
		return
	}
	i.Description = buf.String()
	start := strings.Index(i.Description, "</a>") + 4
	end := strings.Index(i.Description, "</li>")
	i.Description = i.Description[start:end]

	words := strings.Fields(sanitize.HTML(i.Description))
	i.Caption = ""
	for a := 0; len(i.Caption) < 20 && a < len(words); a++ {
		i.Caption += words[a] + " "
	}
	i.Caption = "Fefes Blog - " + strings.TrimSpace(i.Caption) + "..."
	i.ImageURL = ""
}
Пример #9
0
func amazon(i *data.Item, sourceURL string, doc *goquery.Document) {
	if !strings.Contains(sourceURL, "www.amazon.") {
		return
	}

	fmt.Println("Running Amazon plugin.")

	// find picture
	{
		selection := doc.Find("#landingImage")
		if len(selection.Nodes) == 0 {
			fmt.Println("Amazon plugin found no #landingImage. " + sourceURL)
		} else {
			if len(selection.Nodes) > 1 {
				fmt.Println("Amazon plugin found >1 #landingImage. " + sourceURL)
			}
			for _, e := range selection.Nodes {
				if e.Type == html.ElementNode && e.Data == "img" {
					m := htmlAttributeToMap(e.Attr)
					if govalidator.IsRequestURL(m["data-old-hires"]) {
						i.ImageURL = m["data-old-hires"]
					} else {
						fmt.Println("Amazon plugin imgURL invalid. " + m["data-old-hires"])
					}
				}
			}
		}
	}

	// update url to contain tag
	{
		// This is our tag. We should make it configurable
		urlExtension := "tag=" + "gschaftshuonl-21"
		start := strings.Index(i.URL, "tag=")
		if start != -1 {
			end := strings.Index(i.URL[start+1:], "&") + start + 1
			i.URL = i.URL[:start] + i.URL[end:]
		}

		if strings.Index(i.URL, "?") == -1 {
			i.URL += "?" + urlExtension
		} else {
			i.URL += "&" + urlExtension
		}
	}

	// update title
	{
		selection := doc.Find("#productTitle")
		if len(selection.Nodes) == 0 {
			fmt.Println("Amazon plugin found no #productTitle. " + sourceURL)
		} else {
			if len(selection.Nodes) > 1 {
				fmt.Println("Amazon plugin found >1 #productTitle. " + sourceURL)
			}
			for _, e := range selection.Nodes {
				if e.Type == html.ElementNode && e.Data == "span" {
					i.Caption = e.FirstChild.Data
				}
			}
		}

	}
}