Example #1
0
File: html.go Project: bfix/sid
/*
 * Read current tag with attributes.
 * @param tk *html.Tokenizer - tokenizer instance
 * @return *Tag - reference to read tag
 */
func readTag(tk *html.Tokenizer) *Tag {

	// we are only interested in certain tags
	tag, _ := tk.TagName()
	name := string(tag)
	switch name {
	//-----------------------------------------------------
	// external script files
	//-----------------------------------------------------
	case "script":
		attrs := getAttrs(tk)
		if attrs != nil {
			if _, ok := attrs["src"]; ok {
				// add external reference to script file
				return NewTag("script", attrs)
			}
		}

	//-----------------------------------------------------
	// external image
	//-----------------------------------------------------
	case "img":
		attrs := getAttrs(tk)
		if attrs != nil {
			return NewTag("img", attrs)
		}

	//-----------------------------------------------------
	// external links (style sheets)
	//-----------------------------------------------------
	case "link":
		attrs := getAttrs(tk)
		if attrs != nil {
			if _, ok := attrs["href"]; ok {
				// add external reference to link
				return NewTag("link", attrs)
			}
		}

	//-----------------------------------------------------
	// input fields
	//-----------------------------------------------------
	case "input":
		attrs := getAttrs(tk)
		if attrs != nil {
			if _, ok := attrs["type"]; ok {
				// add external reference to link
				return NewTag("input", attrs)
			}
		}
	}
	//-----------------------------------------------------
	// ignore all other tags (no tag processed).
	//-----------------------------------------------------
	return nil
}
Example #2
0
func traverse_html_tokenizer(z *html.Tokenizer) {
	for {
		if z.Next() == html.ErrorToken {
			return
		}
		text_b := z.Text()
		tag_name_b, hasAttri := z.TagName()
		tag_attr_key_b, tag_attr_value_b, _ := z.TagAttr()
		text := string(text_b)
		tag_name := string(tag_name_b)
		tag_attr_key := string(tag_attr_key_b)
		tag_attr_value := string(tag_attr_value_b)
		fmt.Printf("|Tokenizer.Text:%-10s|Tokenizer.TagName:%-10s|hasAttri:%-10t|tag_attr_key:%-10s|tag_attr_value:%-10s|\n", text, tag_name, hasAttri, tag_attr_key, tag_attr_value)
	}
}
Example #3
0
func burnTokensUntilEndTag(firewood *html.Tokenizer, tagName string) {
	rawTagName := []byte(tagName)
	for {
		token := firewood.Next()
		switch token {
		case html.ErrorToken:
			return
		case html.EndTagToken:
			name, _ := firewood.TagName()
			// log.Println("Struck token " + string(name))
			if bytes.Equal(name, rawTagName) {
				// log.Println("Extinguishing token fire.")
				return
			}
		}
	}
}
Example #4
0
func textUpToEndTag(tokenizer *html.Tokenizer, tagName string) []byte {
	var textBuffer bytes.Buffer
	rawTagName := []byte(tagName)
	for done := false; !done; {
		token := tokenizer.Next()
		switch token {
		case html.TextToken:
			textBuffer.Write(tokenizer.Text())
		case html.EndTagToken:
			name, _ := tokenizer.TagName()
			if bytes.Equal(rawTagName, name) {
				done = true
			}
		case html.ErrorToken:
			done = true
		}
	}
	return textBuffer.Bytes()
}
Example #5
0
// getTagName gets a tagName from tokenizer.
func getTagName(tokenizer *html.Tokenizer) string {
	tagName, _ := tokenizer.TagName()
	return string(tagName)
}