Esempio n. 1
0
// NextStartTag skips everything until we find a start tag of the given names.
func NextStartTag(t *html.Tokenizer, tagNames ...string) (html.Token, error) {
	for {
		switch t.Next() {
		case html.ErrorToken:
			return html.Token{}, t.Err()
		case html.SelfClosingTagToken, html.StartTagToken:
			tt := t.Token()
			for _, v := range tagNames {
				if v == tt.Data {
					return tt, nil
				}
			}
		}
	}
	panic("unreachable")
}
Esempio n. 2
0
// TextInTag returns nodes containing all text until the given startTag is
// closed, including the tags of the given names.
//
// It expects well-formed HTML: mismatching closing tags will result in error.
func TextInTag(t *html.Tokenizer, startTag string, tagNames ...string) ([]Node, error) {
	stack := []string{startTag}
	var c []Node
	for len(stack) > 0 {
		// A token can be of the following types:
		//
		//   html.ErrorToken
		//   html.TextToken
		//   html.StartTagToken
		//   html.EndTagToken
		//   html.SelfClosingTagToken
		//   html.CommentToken
		//   html.DoctypeToken
		switch t.Next() {
		case html.ErrorToken:
			return nil, fmt.Errorf("unclosed tags: %v", stack)
		case html.SelfClosingTagToken:
			tt := t.Token()
			tag := tt.Data
		SelfClosingTagLoop:
			for _, v := range tagNames {
				if v == tag {
					c = append(c, Node{Token: tt})
					break SelfClosingTagLoop
				}
			}
		case html.StartTagToken:
			tt := t.Token()
			tag := tt.Data
			found := false
		StartTagLoop:
			for _, v := range tagNames {
				if v == tag {
					found = true
					list, err := TextInTag(t, tag, tagNames...)
					if err != nil {
						return nil, err
					}
					c = append(c, Node{Token: tt, List: list})
					break StartTagLoop
				}
			}
			if !found {
				stack = append(stack, tag)
			}
		case html.EndTagToken:
			var err error
			tag, _ := t.TagName()
			if stack, err = popTag(stack, string(tag)); err != nil {
				return nil, err
			}
		case html.TextToken:
			tt := t.Token()
			tt.Data = strings.TrimSpace(tt.Data)
			if len(tt.Data) > 0 {
				c = append(c, Node{Token: tt})
			}
		}
	}
	return c, nil
}