Beispiel #1
0
func (e Goose) Extract(link string) (data data.ArticleExtract, err error) {
	defer func() {
		if r := recover(); r != nil {
			err = errors.New(fmt.Sprintf("%v", r))
		}
	}()

	g := goose.New()
	/* TODO: preserve links */
	formatted, err := g.ExtractFromURL(link)

	content := formatted.CleanedText
	buf := util.BufferPool.GetBuffer()
	defer util.BufferPool.Put(buf)

	paragraphs := strings.Split(content, "\n")
	var html []template.HTML

	for _, p := range paragraphs {
		if strings.TrimSpace(p) != "" {
			html = append(html, template.HTML(p))
		}
	}

	e.renderer.Render(buf,
		renderer.RenderData{"paragraphs": html, "topImage": formatted.TopImage},
		nil, "goose-format-result.tmpl")

	data.Content = buf.String()
	data.Title = formatted.Title
	data.TopImage = formatted.TopImage
	data.Language = formatted.MetaLang
	return
}
Beispiel #2
0
func (this *Crawler) Analyze(url string) *goose.Article {
	g := goose.New()
	article, err := g.ExtractFromURL(url)
	if err != nil {
		// TODO Probably want to handle this error...
		panic(err)
	}
	return article
}
Beispiel #3
0
func (this *Crawler) Analyze(url string) *goose.Article {
	g := goose.New()
	article := g.ExtractFromUrl(url)
	return article
}