func (e Goose) Extract(link string) (data data.ArticleExtract, err error) { defer func() { if r := recover(); r != nil { err = errors.New(fmt.Sprintf("%v", r)) } }() g := goose.New() /* TODO: preserve links */ formatted, err := g.ExtractFromURL(link) content := formatted.CleanedText buf := util.BufferPool.GetBuffer() defer util.BufferPool.Put(buf) paragraphs := strings.Split(content, "\n") var html []template.HTML for _, p := range paragraphs { if strings.TrimSpace(p) != "" { html = append(html, template.HTML(p)) } } e.renderer.Render(buf, renderer.RenderData{"paragraphs": html, "topImage": formatted.TopImage}, nil, "goose-format-result.tmpl") data.Content = buf.String() data.Title = formatted.Title data.TopImage = formatted.TopImage data.Language = formatted.MetaLang return }
func (this *Crawler) Analyze(url string) *goose.Article { g := goose.New() article, err := g.ExtractFromURL(url) if err != nil { // TODO Probably want to handle this error... panic(err) } return article }
func (this *Crawler) Analyze(url string) *goose.Article { g := goose.New() article := g.ExtractFromUrl(url) return article }