// GoodAsText applies some heuristics to make the data look good when displayed // as simple text. For example, if the data is escaped HTML then other heuristics are // applied to remove the HTML. For example if the data contains an HTML image tag, // goodAsText will return the alt text. If nothing good is found then an empty slice is // returned. func goodAsText(d []byte) []byte { unesc := html.UnescapeString(string(d)) nodes, err := html.ParseFragment(strings.NewReader(unesc), bodyNode) if err != nil { log.Printf("failed to parse [%s] as HTML: %v", unesc, err) return d } var buf bytes.Buffer for _, root := range nodes { walk(root, func(n *html.Node) { if n.Type == html.TextNode { buf.WriteString(strings.TrimSpace(n.Data)) return } if n := buf.Len(); n > 0 && buf.Bytes()[n-1] != ' ' { buf.WriteString(" ") } if n.DataAtom == atom.Img { if alt := altTextOrEmpty(n); alt != "" { buf.WriteString(alt) } } }) } return buf.Bytes() }
func (c atomContent) Data() []byte { unesc := c.Contents if c.Type != "xhtml" { unesc = []byte(html.UnescapeString(string(c.Contents))) } return unesc }
func SnipText(s string, length int) string { s = snipRe.ReplaceAllString(strings.TrimSpace(s), " ") s = html.UnescapeString(s) if len(s) <= length { return s } s = s[:length] i := strings.LastIndexAny(s, " .-!?") if i != -1 { return s[:i] } return CleanNonUTF8(s) }
func snipper(s string) string { s = snipRe.ReplaceAllString(strings.TrimSpace(s), " ") s = html.UnescapeString(s) if len(s) <= snipLen { return s } s = s[:snipLen] i := strings.LastIndexAny(s, " .-!?") if i != -1 { return s[:i] } return cleanNonUTF8(s) }
// EmbedHtml returns unescaped html of embed media func (entry *Entry) EmbedHtml() template.HTML { return template.HTML(html.UnescapeString(entry.Media_Embed.Content)) }
func pollFeed(channel string, feedName string, timeFormat string, uri string) { defer func() { if r := recover(); r != nil { log.Printf("MEGA-WTF:pkg:RSS: %v\n", r) time.Sleep(retryAfter * time.Minute) pollFeed(channel, feedName, timeFormat, uri) } }() if frankconf.Verbose { log.Printf("RSS %s: Setting up %s to post to %s \n", feedName, uri, channel) } // this will process all incoming new feed items and discard all that // are somehow erroneous or older than the threshold. It will directly // post any updates. itemHandler := func(feed *rss.Feed, ch *rss.Channel, newitems []*rss.Item) { log.Printf("RSS: %d new item(s) in %s\n", len(newitems), feedName) postitems := []string{} for _, item := range newitems { pubdate, err := time.Parse(timeFormat, item.PubDate) // ignore items with unreadable date format if err != nil { log.Printf("RSS: WTF @ reading date for %s: %s (err: %v)\n", feedName, item.PubDate, err) continue } // ignore items that were posted before frank booted or are older // than “freshness” minutes if ignoreBefore.After(pubdate) { log.Printf("RSS %s: skipping posts made before booting (posted: %s, booted: %s)\n", feedName, pubdate, ignoreBefore) continue } if time.Since(pubdate) >= freshness*time.Minute { log.Printf("RSS %s: skipping non-fresh post (posted: %s, time_ago: %s)\n", feedName, pubdate, time.Since(pubdate)) continue } url := "" if len(item.Links) > 0 { url = item.Links[0].Href } if url != "" && isRecentUrl(url) { if frankconf.Verbose { log.Printf("RSS %s: Skipping item because saved as recent URL (URL: %s)\n", feedName, url) } continue } if url != "" { addRecentUrl(url) url = " @ " + url } author := html.UnescapeString(item.Author.Name) title := html.UnescapeString(item.Title) if author == "" { postitems = appendIfMiss(postitems, "::"+feedName+":: "+title+url) } else { postitems = appendIfMiss(postitems, "::"+feedName+":: "+title+url+" (by "+author+")") } } cnt := len(postitems) // hide updates if they exceed the maxItems counter. If there’s only // one more item in the list than specified in maxItems, all of the // items will be printed – otherwise that item would be replaced by // a useless message that it has been hidden. if cnt > maxItems+1 { cntS := strconv.Itoa(cnt) maxS := strconv.Itoa(maxItems) msg := "::" + feedName + ":: had " + cntS + " updates, showing the latest " + maxS conn.Privmsg(channel, msg) postitems = postitems[cnt-maxItems : cnt] } // newer items appear first in feeds, so reverse them here to keep // the order in line with how IRC wprks for i := len(postitems) - 1; i >= 0; i -= 1 { conn.Privmsg(channel, postitems[i]) log.Printf("RSS %s: posting %s\n", feedName, postitems[i]) } } // create the feed listener/updater feed := rss.New(checkEvery, true, chanHandler, itemHandler) // check for updates infinite loop for { if frankconf.Verbose { t := feed.LastUpdate().Format(time.RFC3339) log.Printf("RSS %s: Updating now (previous update: %s, refresh ok: %s)\n", feedName, t, feed.CanUpdate()) } if err := feed.FetchClient(uri, &rssHttpClient, nil); err != nil { log.Printf("RSS %s: Error for %s: %s\n", feedName, uri, err) time.Sleep(retryAfter * time.Minute) continue } <-time.After(time.Duration(feed.SecondsTillUpdate() * 1e9)) } }