func hasTablPluieClass(z *html.Tokenizer) bool { key, val, more := z.TagAttr() if string(key) == "class" && string(val) == "tablPluie" { return true } if more { return hasTablPluieClass(z) } return false }
func traverse_html_tokenizer(z *html.Tokenizer) { for { if z.Next() == html.ErrorToken { return } text_b := z.Text() tag_name_b, hasAttri := z.TagName() tag_attr_key_b, tag_attr_value_b, _ := z.TagAttr() text := string(text_b) tag_name := string(tag_name_b) tag_attr_key := string(tag_attr_key_b) tag_attr_value := string(tag_attr_value_b) fmt.Printf("|Tokenizer.Text:%-10s|Tokenizer.TagName:%-10s|hasAttri:%-10t|tag_attr_key:%-10s|tag_attr_value:%-10s|\n", text, tag_name, hasAttri, tag_attr_key, tag_attr_value) } }
func getAttrVal(tokenizer *html.Tokenizer, attrName string) string { for { key, val, moreAttr := tokenizer.TagAttr() if string(key) == attrName { return string(val) } if !moreAttr { return "" } } }
// parseAnchorAttrs iterates over all of the attributes in the current anchor token. // If a href is found, it adds the link value to the links slice. // Returns the new link slice. func parseAnchorAttrs(tokenizer *html.Tokenizer, links []*URL) []*URL { //TODO: rework this to be cleaner, passing in `links` to be appended to //isn't great for { key, val, moreAttr := tokenizer.TagAttr() if bytes.Compare(key, []byte("href")) == 0 { u, err := ParseURL(strings.TrimSpace(string(val))) if err == nil { links = append(links, u) } } if !moreAttr { return links } } }
/* * Get list of attributes for a tag. * If the tag is at the end of a HTML fragment and not all attributes * can be read by the tokenizer, this call terminates with a "nil" * map to indicate failure. The tag is than dropped (for an eavesdropper * this looks like a cached resource) * @param tk *html.Tokenizer - tokenizer instance * @return map[string]string - list of attributes */ func getAttrs(tk *html.Tokenizer) (list map[string]string) { // handle panic during parsing defer func() { if r := recover(); r != nil { logger.Printf(logger.WARN, "[sid.html] Skipping fragmented tag: %v\n", r) list = nil } }() // parse attributes from HTML text list = make(map[string]string) for { key, val, more := tk.TagAttr() list[string(key)] = string(val) if !more { break } } return }