Example #1
0
func TestStripTags(t *testing.T) {
	assert := asserts.NewTestingAsserts(t, true)

	in := "<p>The quick brown <b>fox</b> jumps over the lazy <em>dog</em>.</p>"
	out, err := net.StripTags(in, true, false)
	assert.Nil(err, "No error during stripping.")
	assert.Equal(out, "The quick brown fox jumps over the lazy dog .", "Tags have been removed.")

	in = "<p>The quick brown <b>fox</b> jumps over the lazy <em>dog.</p>"
	out, err = net.StripTags(in, true, false)
	assert.ErrorMatch(err, `XML syntax error on line 1.*`, "Error in document detected.")

	in = "<p>The quick brown <b>fox</b> jumps over the lazy <em>dog.</p>"
	out, err = net.StripTags(in, false, false)
	assert.Nil(err, "No error during stripping.")
	assert.Equal(out, "The quick brown fox jumps over the lazy dog.", "Tags have been removed.")

	in = "<p>The quick brown <b>fox &amp; goose</b> jump over the lazy &lt;em&gt;dog&lt;/em&gt;.</p>"
	out, err = net.StripTags(in, true, false)
	assert.Nil(err, "No error during stripping.")
	assert.Equal(out, "The quick brown fox & goose jump over the lazy <em>dog</em>.", "Tags have been removed.")

	in = "<p>The quick brown <b>fox &amp;amp; goose</b> jump over the lazy &lt;em&gt;dog&lt;/em&gt;.</p>"
	out, err = net.StripTags(in, true, true)
	assert.Nil(err, "No error during stripping.")
	assert.Equal(out, "The quick brown fox & goose jump over the lazy dog .", "Tags have been removed.")
}
Example #2
0
// PlainText returns the text as string without any markup. Content from
// external sources will be retrieved.
func (t Text) PlainText() (string, error) {
	// Retrieve the raw text.
	var raw string
	if t.Src != "" {
		resp, err := http.Get(t.Src)
		if err != nil {
			return "", newNoPlainTextError(t, err)
		}
		defer resp.Body.Close()
		body, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			return "", newNoPlainTextError(t, err)
		}
		raw = string(body)
	} else {
		raw = t.Text
	}
	// Handle raw text depending on type.
	switch t.Type {
	case "", TextType:
		return raw, nil
	case HTMLType:
		return net.StripTags(raw, false, true)
	case XHTMLType:
		return net.StripTags(raw, true, true)
	}
	if strings.HasSuffix(t.Type, "xml") {
		return net.StripTags(raw, true, true)
	}
	return "", newNoPlainTextError(t, fmt.Errorf("illegal text type: %s", t.Type))
}