// NextStartTag skips everything until we find a start tag of the given names. func NextStartTag(t *html.Tokenizer, tagNames ...string) (html.Token, error) { for { switch t.Next() { case html.ErrorToken: return html.Token{}, t.Err() case html.SelfClosingTagToken, html.StartTagToken: tt := t.Token() for _, v := range tagNames { if v == tt.Data { return tt, nil } } } } panic("unreachable") }
// TextInTag returns nodes containing all text until the given startTag is // closed, including the tags of the given names. // // It expects well-formed HTML: mismatching closing tags will result in error. func TextInTag(t *html.Tokenizer, startTag string, tagNames ...string) ([]Node, error) { stack := []string{startTag} var c []Node for len(stack) > 0 { // A token can be of the following types: // // html.ErrorToken // html.TextToken // html.StartTagToken // html.EndTagToken // html.SelfClosingTagToken // html.CommentToken // html.DoctypeToken switch t.Next() { case html.ErrorToken: return nil, fmt.Errorf("unclosed tags: %v", stack) case html.SelfClosingTagToken: tt := t.Token() tag := tt.Data SelfClosingTagLoop: for _, v := range tagNames { if v == tag { c = append(c, Node{Token: tt}) break SelfClosingTagLoop } } case html.StartTagToken: tt := t.Token() tag := tt.Data found := false StartTagLoop: for _, v := range tagNames { if v == tag { found = true list, err := TextInTag(t, tag, tagNames...) if err != nil { return nil, err } c = append(c, Node{Token: tt, List: list}) break StartTagLoop } } if !found { stack = append(stack, tag) } case html.EndTagToken: var err error tag, _ := t.TagName() if stack, err = popTag(stack, string(tag)); err != nil { return nil, err } case html.TextToken: tt := t.Token() tt.Data = strings.TrimSpace(tt.Data) if len(tt.Data) > 0 { c = append(c, Node{Token: tt}) } } } return c, nil }