func main() { flag.Parse() if *uri == "" { flag.PrintDefaults() return } c := curl.NewCurl("e:/") cache, err := c.GetUtf8(*uri) if err != nil { panic(err) } f, err := os.Open(cache.LocalUtf8) if err != nil { panic(err) } defer f.Close() doc, err := html.Parse(f) if err != nil { panic(err) } ex := cleaner.NewExtractor("e:/") article, _, err := ex.MakeHtmlReadable(doc, *uri) if err != nil { panic(err) } print_html_doc(article) }
func feedentry_fulldoc(uri string) (v feed.FeedContent, err error) { c := curl.NewCurlerDetail(backend_context.config.DocumentFolder, 0, 0, nil, backend_context.ruler) cache, err := c.GetUtf8(uri) if err != nil { return v, err } v.Uri = uri v.Local = cache.LocalUtf8 if curl.MimeToExt(cache.Mime) != "html" { return v, new_backenderror(-1, "unsupported mime: "+cache.Mime) } doc, err := html_create_from_file(cache.LocalUtf8) if err != nil { return v, err } article, sum, err := cleaner.NewExtractor(backend_context.config.CleanFolder).MakeHtmlReadable(doc, uri) if err != nil { return v, err } v.Local, err = html_write_file(article, backend_context.config.DocumentFolder) redirector := func(turi string) string { return redirect_thumbnail(url_resolve(uri, turi)) } imgurl_maker := func(reluri string) string { u := url_resolve(uri, reluri) return imageurl_from_video(u) } v.Images = append_unique(v.Images, feedmedias_from_docsummary(sum.Images, redirector)...) v.Images = append_unique(v.Images, feedmedias_from_docsummary(sum.Medias, imgurl_maker)...) v.Words = uint(sum.WordCount) v.Links = uint(sum.LinkCount) v.FlowDoc = new_flowdoc_maker().make(article, v.Images) return v, err }
func make_text_readable(entry *ReadEntry, txt string, trans, insimg bool) (string, feed.FeedTextStatus) { var status feed.FeedTextStatus if txt == "" { status.Status = status.Status | feed.Feed_status_content_empty return empty_flowdocument, status } if trans { txt = markhtml.TransferText(txt) } redirector := func(uri string) string { return redirect_thumbnail(url_resolve(entry.Uri, uri)) } imgurl_maker := func(uri string) string { u := url_resolve(entry.Uri, uri) return imageurl_from_video(u) } frag, _ := html_create_fragment(txt) frag, score, _ := cleaner.NewExtractor(backend_context.config.CleanFolder).MakeFragmentReadable(frag) entry.Images = append_unique(entry.Images, feedmedias_from_docsummary(score.Images, redirector)...) entry.Images = append_unique(entry.Images, feedmedias_from_docsummary(score.Medias, imgurl_maker)...) entry.Videos = append_unique(entry.Videos, feedmedias_from_docsummary(score.Medias, func(o string) string { return o })...) status.WordCount = score.WordCount status.LinkCount = score.LinkCount status.LinkWordCount = score.LinkWordCount if status.WordCount < backend_config().SummaryMinWords { // if status.WordCount > 0 { // entry.Title.Others = append(entry.Title.Others, score.Text) // } status.Status = status.Status | feed.Feed_status_content_empty } if status.WordCount > 0 && feedentry_content_exists(score.Hash) { status.Status = status.Status | feed.Feed_status_content_duplicated } status.Status |= feed.Feed_status_content_ready imgs := entry.Images if insimg == false || len(imgs) > 1 || len(entry.Videos) > 0 || status.WordCount < backend_config().SummaryMinWords { imgs = nil } else if len(imgs) > 0 { status.Status |= feed.Feed_status_content_mediainline } return new_flowdoc_maker().make(frag, imgs), status }