// Returns the href attribute of a <link rel="shortcut icon"> tag or error if not found. func FindIcon(b []byte) (string, error) { r := bytes.NewReader(b) z := html.NewTokenizer(r) for { if z.Next() == html.ErrorToken { if err := z.Err(); err == io.EOF { break } else { return "", ErrNoIcon } } t := z.Token() switch t.DataAtom { case atom.Link: if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken { attrs := make(map[string]string) for _, a := range t.Attr { attrs[a.Key] = a.Val } if attrs["rel"] == "shortcut icon" && attrs["href"] != "" { return attrs["href"], nil } } } } return "", ErrNoIcon }
func Autodiscover(b []byte) (string, error) { r := bytes.NewReader(b) z := html.NewTokenizer(r) for { if z.Next() == html.ErrorToken { if err := z.Err(); err == io.EOF { break } else { return "", ErrNoRssLink } } t := z.Token() switch t.DataAtom { case atom.Link: if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken { attrs := make(map[string]string) for _, a := range t.Attr { attrs[a.Key] = a.Val } if attrs["rel"] == "alternate" && attrs["href"] != "" && (attrs["type"] == "application/rss+xml" || attrs["type"] == "application/atom+xml") { return attrs["href"], nil } } } } return "", ErrNoRssLink }
func Sanitize(s string, u *url.URL) (string, string) { r := bytes.NewReader([]byte(strings.TrimSpace(s))) z := html.NewTokenizer(r) buf := &bytes.Buffer{} strip := &bytes.Buffer{} skip := 0 if u != nil { u.RawQuery = "" u.Fragment = "" } for { if z.Next() == html.ErrorToken { if err := z.Err(); err == io.EOF { break } else { return s, s } } t := z.Token() if t.Type == html.StartTagToken || t.Type == html.SelfClosingTagToken { if !acceptableElements[t.Data] { if unacceptableElementsWithEndTag[t.Data] && t.Type != html.SelfClosingTagToken { skip += 1 } } else { sanitizeAttributes(u, &t) buf.WriteString(t.String()) } } else if t.Type == html.EndTagToken { if !acceptableElements[t.Data] { if unacceptableElementsWithEndTag[t.Data] { skip -= 1 } } else { buf.WriteString(t.String()) } } else if skip == 0 { buf.WriteString(t.String()) if t.Type == html.TextToken { strip.WriteString(t.String()) } } } return buf.String(), strip.String() }
func prescan(content []byte) (e encoding.Encoding, name string) { z := html.NewTokenizer(bytes.NewReader(content)) for { switch z.Next() { case html.ErrorToken: return nil, "" case html.StartTagToken, html.SelfClosingTagToken: tagName, hasAttr := z.TagName() if !bytes.Equal(tagName, []byte("meta")) { continue } attrList := make(map[string]bool) gotPragma := false const ( dontKnow = iota doNeedPragma doNotNeedPragma ) needPragma := dontKnow name = "" e = nil for hasAttr { var key, val []byte key, val, hasAttr = z.TagAttr() ks := string(key) if attrList[ks] { continue } attrList[ks] = true for i, c := range val { if 'A' <= c && c <= 'Z' { val[i] = c + 0x20 } } switch ks { case "http-equiv": if bytes.Equal(val, []byte("content-type")) { gotPragma = true } case "content": if e == nil { name = fromMetaElement(string(val)) if name != "" { e, name = Lookup(name) if e != nil { needPragma = doNeedPragma } } } case "charset": e, name = Lookup(string(val)) needPragma = doNotNeedPragma } } if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma { continue } if strings.HasPrefix(name, "utf-16") { name = "utf-8" e = encoding.Nop } if e != nil { return e, name } } } }