Beispiel #1
0
// Returns the href attribute of a <link rel="shortcut icon"> tag or error if not found.
func FindIcon(b []byte) (string, error) {
	r := bytes.NewReader(b)
	z := html.NewTokenizer(r)
	for {
		if z.Next() == html.ErrorToken {
			if err := z.Err(); err == io.EOF {
				break
			} else {
				return "", ErrNoIcon
			}
		}
		t := z.Token()
		switch t.DataAtom {
		case atom.Link:
			if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken {
				attrs := make(map[string]string)
				for _, a := range t.Attr {
					attrs[a.Key] = a.Val
				}
				if attrs["rel"] == "shortcut icon" && attrs["href"] != "" {
					return attrs["href"], nil
				}
			}
		}
	}
	return "", ErrNoIcon
}
Beispiel #2
0
func Autodiscover(b []byte) (string, error) {
	r := bytes.NewReader(b)
	z := html.NewTokenizer(r)
	for {
		if z.Next() == html.ErrorToken {
			if err := z.Err(); err == io.EOF {
				break
			} else {
				return "", ErrNoRssLink
			}
		}
		t := z.Token()
		switch t.DataAtom {
		case atom.Link:
			if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken {
				attrs := make(map[string]string)
				for _, a := range t.Attr {
					attrs[a.Key] = a.Val
				}
				if attrs["rel"] == "alternate" && attrs["href"] != "" &&
					(attrs["type"] == "application/rss+xml" || attrs["type"] == "application/atom+xml") {
					return attrs["href"], nil
				}
			}
		}
	}
	return "", ErrNoRssLink
}
Beispiel #3
0
func Sanitize(s string, u *url.URL) (string, string) {
	r := bytes.NewReader([]byte(strings.TrimSpace(s)))
	z := html.NewTokenizer(r)
	buf := &bytes.Buffer{}
	strip := &bytes.Buffer{}
	skip := 0
	if u != nil {
		u.RawQuery = ""
		u.Fragment = ""
	}
	for {
		if z.Next() == html.ErrorToken {
			if err := z.Err(); err == io.EOF {
				break
			} else {
				return s, s
			}
		}

		t := z.Token()
		if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken {
			if !acceptableElements[t.Data] {
				if unacceptableElementsWithEndTag[t.Data] && t.Type != html.SelfClosingTagToken {
					skip += 1
				}
			} else {
				sanitizeAttributes(u, &t)
				buf.WriteString(t.String())
			}
		} else if t.Type == html.EndTagToken {
			if !acceptableElements[t.Data] {
				if unacceptableElementsWithEndTag[t.Data] {
					skip -= 1
				}
			} else {
				buf.WriteString(t.String())
			}
		} else if skip == 0 {
			buf.WriteString(t.String())
			if t.Type == html.TextToken {
				strip.WriteString(t.String())
			}
		}
	}

	return buf.String(), strip.String()
}
Beispiel #4
0
func prescan(content []byte) (e encoding.Encoding, name string) {
	z := html.NewTokenizer(bytes.NewReader(content))
	for {
		switch z.Next() {
		case html.ErrorToken:
			return nil, ""

		case html.StartTagToken, html.SelfClosingTagToken:
			tagName, hasAttr := z.TagName()
			if !bytes.Equal(tagName, []byte("meta")) {
				continue
			}
			attrList := make(map[string]bool)
			gotPragma := false

			const (
				dontKnow = iota
				doNeedPragma
				doNotNeedPragma
			)
			needPragma := dontKnow

			name = ""
			e = nil
			for hasAttr {
				var key, val []byte
				key, val, hasAttr = z.TagAttr()
				ks := string(key)
				if attrList[ks] {
					continue
				}
				attrList[ks] = true
				for i, c := range val {
					if 'A' <= c && c <= 'Z' {
						val[i] = c + 0x20
					}
				}

				switch ks {
				case "http-equiv":
					if bytes.Equal(val, []byte("content-type")) {
						gotPragma = true
					}

				case "content":
					if e == nil {
						name = fromMetaElement(string(val))
						if name != "" {
							e, name = Lookup(name)
							if e != nil {
								needPragma = doNeedPragma
							}
						}
					}

				case "charset":
					e, name = Lookup(string(val))
					needPragma = doNotNeedPragma
				}
			}

			if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
				continue
			}

			if strings.HasPrefix(name, "utf-16") {
				name = "utf-8"
				e = encoding.Nop
			}

			if e != nil {
				return e, name
			}
		}
	}
}