Beispiel #1
0
// StripSomeHTML removes the most common html presentation tags from the text
func StripSomeHTML(msg []byte) (out []byte) {
	z := html.NewTokenizer(bytes.NewReader(msg))

loop:
	for {
		tt := z.Next()
		switch tt {
		case html.TextToken:
			out = append(out, z.Text()...)
		case html.ErrorToken:
			if err := z.Err(); err != nil && err != io.EOF {
				out = msg
				return
			}
			break loop
		case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
			raw := z.Raw()
			name, _ := z.TagName()

			if !tagsToAvoid[string(name)] {
				out = append(out, raw...)
			}
		case html.CommentToken:
			out = append(out, z.Raw()...)
		case html.DoctypeToken:
			out = append(out, z.Raw()...)
		}
	}

	return
}
Beispiel #2
0
// StripHTML removes all html in the text
func StripHTML(msg []byte) (out []byte) {
	z := html.NewTokenizer(bytes.NewReader(msg))

loop:
	for {
		tt := z.Next()
		switch tt {
		case html.TextToken:
			out = append(out, z.Text()...)
		case html.ErrorToken:
			if err := z.Err(); err != nil && err != io.EOF {
				out = msg
				return
			}
			break loop
		}
	}

	return
}