Esempio n. 1
0
func main() {
	flag.Parse()
	if *uri == "" {
		flag.PrintDefaults()
		return
	}
	c := curl.NewCurl("e:/")
	cache, err := c.GetUtf8(*uri)
	if err != nil {
		panic(err)
	}
	f, err := os.Open(cache.LocalUtf8)
	if err != nil {
		panic(err)
	}
	defer f.Close()
	doc, err := html.Parse(f)
	if err != nil {
		panic(err)
	}
	ex := cleaner.NewExtractor("e:/")
	article, _, err := ex.MakeHtmlReadable(doc, *uri)
	if err != nil {
		panic(err)
	}
	print_html_doc(article)
}
Esempio n. 2
0
func feedentry_fulldoc(uri string) (v feed.FeedContent, err error) {
	c := curl.NewCurlerDetail(backend_context.config.DocumentFolder, 0, 0, nil, backend_context.ruler)
	cache, err := c.GetUtf8(uri)
	if err != nil {
		return v, err
	}
	v.Uri = uri
	v.Local = cache.LocalUtf8
	if curl.MimeToExt(cache.Mime) != "html" {
		return v, new_backenderror(-1, "unsupported mime: "+cache.Mime)
	}
	doc, err := html_create_from_file(cache.LocalUtf8)
	if err != nil {
		return v, err
	}
	article, sum, err := cleaner.NewExtractor(backend_context.config.CleanFolder).MakeHtmlReadable(doc, uri)
	if err != nil {
		return v, err
	}
	v.Local, err = html_write_file(article, backend_context.config.DocumentFolder)
	redirector := func(turi string) string {
		return redirect_thumbnail(url_resolve(uri, turi))
	}
	imgurl_maker := func(reluri string) string {
		u := url_resolve(uri, reluri)
		return imageurl_from_video(u)
	}
	v.Images = append_unique(v.Images, feedmedias_from_docsummary(sum.Images, redirector)...)
	v.Images = append_unique(v.Images, feedmedias_from_docsummary(sum.Medias, imgurl_maker)...)
	v.Words = uint(sum.WordCount)
	v.Links = uint(sum.LinkCount)
	v.FlowDoc = new_flowdoc_maker().make(article, v.Images)
	return v, err
}
Esempio n. 3
0
func make_text_readable(entry *ReadEntry, txt string, trans, insimg bool) (string, feed.FeedTextStatus) {
	var status feed.FeedTextStatus
	if txt == "" {
		status.Status = status.Status | feed.Feed_status_content_empty
		return empty_flowdocument, status
	}
	if trans {
		txt = markhtml.TransferText(txt)
	}
	redirector := func(uri string) string {
		return redirect_thumbnail(url_resolve(entry.Uri, uri))
	}
	imgurl_maker := func(uri string) string {
		u := url_resolve(entry.Uri, uri)
		return imageurl_from_video(u)
	}
	frag, _ := html_create_fragment(txt)
	frag, score, _ := cleaner.NewExtractor(backend_context.config.CleanFolder).MakeFragmentReadable(frag)
	entry.Images = append_unique(entry.Images, feedmedias_from_docsummary(score.Images, redirector)...)
	entry.Images = append_unique(entry.Images, feedmedias_from_docsummary(score.Medias, imgurl_maker)...)

	entry.Videos = append_unique(entry.Videos, feedmedias_from_docsummary(score.Medias, func(o string) string { return o })...)
	status.WordCount = score.WordCount
	status.LinkCount = score.LinkCount
	status.LinkWordCount = score.LinkWordCount
	if status.WordCount < backend_config().SummaryMinWords {
		//		if status.WordCount > 0 {
		//			entry.Title.Others = append(entry.Title.Others, score.Text)
		//		}
		status.Status = status.Status | feed.Feed_status_content_empty
	}
	if status.WordCount > 0 && feedentry_content_exists(score.Hash) {
		status.Status = status.Status | feed.Feed_status_content_duplicated
	}
	status.Status |= feed.Feed_status_content_ready
	imgs := entry.Images
	if insimg == false || len(imgs) > 1 ||
		len(entry.Videos) > 0 ||
		status.WordCount < backend_config().SummaryMinWords {
		imgs = nil
	} else if len(imgs) > 0 {
		status.Status |= feed.Feed_status_content_mediainline
	}
	return new_flowdoc_maker().make(frag, imgs), status
}