Exemple #1
0
// GoodAsText applies some heuristics to make the data look good when displayed
// as simple text. For example, if the data is escaped HTML then other heuristics are
// applied to remove the HTML. For example if the data contains an HTML image tag,
// goodAsText will return the alt text. If nothing good is found then an empty slice is
// returned.
func goodAsText(d []byte) []byte {
	unesc := html.UnescapeString(string(d))
	nodes, err := html.ParseFragment(strings.NewReader(unesc), bodyNode)
	if err != nil {
		log.Printf("failed to parse [%s] as HTML: %v", unesc, err)
		return d
	}

	var buf bytes.Buffer
	for _, root := range nodes {
		walk(root, func(n *html.Node) {
			if n.Type == html.TextNode {
				buf.WriteString(strings.TrimSpace(n.Data))
				return
			}

			if n := buf.Len(); n > 0 && buf.Bytes()[n-1] != ' ' {
				buf.WriteString(" ")
			}
			if n.DataAtom == atom.Img {
				if alt := altTextOrEmpty(n); alt != "" {
					buf.WriteString(alt)
				}
			}
		})
	}
	return buf.Bytes()
}
Exemple #2
0
func (c atomContent) Data() []byte {
	unesc := c.Contents
	if c.Type != "xhtml" {
		unesc = []byte(html.UnescapeString(string(c.Contents)))
	}
	return unesc
}
Exemple #3
0
func SnipText(s string, length int) string {
	s = snipRe.ReplaceAllString(strings.TrimSpace(s), " ")
	s = html.UnescapeString(s)
	if len(s) <= length {
		return s
	}
	s = s[:length]
	i := strings.LastIndexAny(s, " .-!?")
	if i != -1 {
		return s[:i]
	}
	return CleanNonUTF8(s)
}
Exemple #4
0
func snipper(s string) string {
	s = snipRe.ReplaceAllString(strings.TrimSpace(s), " ")
	s = html.UnescapeString(s)
	if len(s) <= snipLen {
		return s
	}
	s = s[:snipLen]
	i := strings.LastIndexAny(s, " .-!?")
	if i != -1 {
		return s[:i]
	}
	return cleanNonUTF8(s)
}
Exemple #5
0
// EmbedHtml returns unescaped html of embed media
func (entry *Entry) EmbedHtml() template.HTML {
	return template.HTML(html.UnescapeString(entry.Media_Embed.Content))
}
Exemple #6
0
func pollFeed(channel string, feedName string, timeFormat string, uri string) {
	defer func() {
		if r := recover(); r != nil {
			log.Printf("MEGA-WTF:pkg:RSS: %v\n", r)
			time.Sleep(retryAfter * time.Minute)
			pollFeed(channel, feedName, timeFormat, uri)
		}
	}()

	if frankconf.Verbose {
		log.Printf("RSS %s: Setting up %s to post to %s \n", feedName, uri, channel)
	}

	// this will process all incoming new feed items and discard all that
	// are somehow erroneous or older than the threshold. It will directly
	// post any updates.
	itemHandler := func(feed *rss.Feed, ch *rss.Channel, newitems []*rss.Item) {
		log.Printf("RSS: %d new item(s) in %s\n", len(newitems), feedName)

		postitems := []string{}

		for _, item := range newitems {
			pubdate, err := time.Parse(timeFormat, item.PubDate)
			// ignore items with unreadable date format
			if err != nil {
				log.Printf("RSS: WTF @ reading date for %s: %s (err: %v)\n", feedName, item.PubDate, err)
				continue
			}

			// ignore items that were posted before frank booted or are older
			// than “freshness” minutes
			if ignoreBefore.After(pubdate) {
				log.Printf("RSS %s: skipping posts made before booting (posted: %s, booted: %s)\n", feedName, pubdate, ignoreBefore)
				continue
			}
			if time.Since(pubdate) >= freshness*time.Minute {
				log.Printf("RSS %s: skipping non-fresh post (posted: %s, time_ago: %s)\n", feedName, pubdate, time.Since(pubdate))
				continue
			}

			url := ""
			if len(item.Links) > 0 {
				url = item.Links[0].Href
			}

			if url != "" && isRecentUrl(url) {
				if frankconf.Verbose {
					log.Printf("RSS %s: Skipping item because saved as recent URL (URL: %s)\n", feedName, url)
				}
				continue
			}

			if url != "" {
				addRecentUrl(url)
				url = " @ " + url
			}

			author := html.UnescapeString(item.Author.Name)
			title := html.UnescapeString(item.Title)

			if author == "" {
				postitems = appendIfMiss(postitems, "::"+feedName+":: "+title+url)
			} else {
				postitems = appendIfMiss(postitems, "::"+feedName+":: "+title+url+" (by "+author+")")
			}
		}

		cnt := len(postitems)

		// hide updates if they exceed the maxItems counter. If there’s only
		// one more item in the list than specified in maxItems, all of the
		// items will be printed – otherwise that item would be replaced by
		// a useless message that it has been hidden.
		if cnt > maxItems+1 {
			cntS := strconv.Itoa(cnt)
			maxS := strconv.Itoa(maxItems)
			msg := "::" + feedName + ":: had " + cntS + " updates, showing the latest " + maxS
			conn.Privmsg(channel, msg)
			postitems = postitems[cnt-maxItems : cnt]
		}

		// newer items appear first in feeds, so reverse them here to keep
		// the order in line with how IRC wprks
		for i := len(postitems) - 1; i >= 0; i -= 1 {
			conn.Privmsg(channel, postitems[i])
			log.Printf("RSS %s: posting %s\n", feedName, postitems[i])
		}
	}

	// create the feed listener/updater
	feed := rss.New(checkEvery, true, chanHandler, itemHandler)

	// check for updates infinite loop
	for {
		if frankconf.Verbose {
			t := feed.LastUpdate().Format(time.RFC3339)
			log.Printf("RSS %s: Updating now (previous update: %s, refresh ok: %s)\n", feedName, t, feed.CanUpdate())
		}

		if err := feed.FetchClient(uri, &rssHttpClient, nil); err != nil {
			log.Printf("RSS %s: Error for %s: %s\n", feedName, uri, err)
			time.Sleep(retryAfter * time.Minute)
			continue
		}

		<-time.After(time.Duration(feed.SecondsTillUpdate() * 1e9))
	}
}