// ProcessHTML parses given html from Reader interface and fills up OpenGraph structure func (og *OpenGraph) ProcessHTML(buffer io.Reader) error { z := html.NewTokenizer(buffer) for { tt := z.Next() switch tt { case html.ErrorToken: if z.Err() == io.EOF { return nil } return z.Err() case html.StartTagToken, html.SelfClosingTagToken, html.EndTagToken: name, hasAttr := z.TagName() if atom.Lookup(name) == atom.Body { return nil // OpenGraph is only in head, so we don't need body } if atom.Lookup(name) != atom.Meta || !hasAttr { continue } m := make(map[string]string) var key, val []byte for hasAttr { key, val, hasAttr = z.TagAttr() m[atom.String(key)] = string(val) } og.ProcessMeta(m) } } return nil }
// Token returns the next Token. The result's Data and Attr values remain valid // after subsequent Next calls. func (z *Tokenizer) Token() Token { t := Token{Type: z.tt} switch z.tt { case TextToken, CommentToken, DoctypeToken: t.Data = string(z.Text()) case StartTagToken, SelfClosingTagToken, EndTagToken: name, moreAttr := z.TagName() for moreAttr { var key, val []byte key, val, moreAttr = z.TagAttr() t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)}) } if a := atom.Lookup(name); a != 0 { t.DataAtom, t.Data = a, a.String() } else { t.DataAtom, t.Data = 0, string(name) } } return t }