Пример #1
0
// returns a jsonifiable struct
func jsonify(node *html.Node) map[string]interface{} {
	vals := map[string]interface{}{}
	if len(node.Attr) > 0 {
		for _, attr := range node.Attr {
			if pupEscapeHTML {
				vals[attr.Key] = html.EscapeString(attr.Val)
			} else {
				vals[attr.Key] = attr.Val
			}
		}
	}
	vals["tag"] = node.DataAtom.String()
	children := []interface{}{}
	for child := node.FirstChild; child != nil; child = child.NextSibling {
		switch child.Type {
		case html.ElementNode:
			children = append(children, jsonify(child))
		case html.TextNode:
			text := strings.TrimSpace(child.Data)
			if text != "" {
				if pupEscapeHTML {
					// don't escape javascript
					if node.DataAtom != atom.Script {
						text = html.EscapeString(text)
					}
				}
				// if there is already text we'll append it
				currText, ok := vals["text"]
				if ok {
					text = fmt.Sprintf("%s %s", currText, text)
				}
				vals["text"] = text
			}
		case html.CommentNode:
			comment := strings.TrimSpace(child.Data)
			if pupEscapeHTML {
				comment = html.EscapeString(comment)
			}
			currComment, ok := vals["comment"]
			if ok {
				comment = fmt.Sprintf("%s %s", currComment, comment)
			}
			vals["comment"] = comment
		}
	}
	if len(children) > 0 {
		vals["children"] = children
	}
	return vals
}
Пример #2
0
// The <pre> tag indicates that the text within it should always be formatted
// as is. See https://github.com/ericchiang/pup/issues/33
func (t TreeDisplayer) printPre(n *html.Node) {
	switch n.Type {
	case html.TextNode:
		s := n.Data
		if pupEscapeHTML {
			// don't escape javascript
			if n.Parent == nil || n.Parent.DataAtom != atom.Script {
				s = html.EscapeString(s)
			}
		}
		fmt.Print(s)
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			t.printPre(c)
		}
	case html.ElementNode:
		fmt.Printf("<%s", n.Data)
		for _, a := range n.Attr {
			val := a.Val
			if pupEscapeHTML {
				val = html.EscapeString(val)
			}
			fmt.Printf(` %s="%s"`, a.Key, val)
		}
		fmt.Print(">")
		if !isVoidElement(n) {
			for c := n.FirstChild; c != nil; c = c.NextSibling {
				t.printPre(c)
			}
			fmt.Printf("</%s>", n.Data)
		}
	case html.CommentNode:
		data := n.Data
		if pupEscapeHTML {
			data = html.EscapeString(data)
		}
		fmt.Printf("<!--%s-->\n", data)
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			t.printPre(c)
		}
	case html.DoctypeNode, html.DocumentNode:
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			t.printPre(c)
		}
	}
}
Пример #3
0
func (a AttrDisplayer) Display(nodes []*html.Node) {
	for _, node := range nodes {
		attributes := node.Attr
		for _, attr := range attributes {
			if attr.Key == a.Attr {
				val := attr.Val
				if pupEscapeHTML {
					val = html.EscapeString(val)
				}
				fmt.Printf("%s\n", val)
			}
		}
	}
}
Пример #4
0
func (t TextDisplayer) Display(nodes []*html.Node) {
	for _, node := range nodes {
		if node.Type == html.TextNode {
			data := node.Data
			if pupEscapeHTML {
				// don't escape javascript
				if node.Parent == nil || node.Parent.DataAtom != atom.Script {
					data = html.EscapeString(data)
				}
			}
			fmt.Println(data)
		}
		children := []*html.Node{}
		child := node.FirstChild
		for child != nil {
			children = append(children, child)
			child = child.NextSibling
		}
		t.Display(children)
	}
}
Пример #5
0
// Print a node and all of it's children to `maxlevel`.
func (t TreeDisplayer) printNode(n *html.Node, level int) {
	switch n.Type {
	case html.TextNode:
		s := n.Data
		if pupEscapeHTML {
			// don't escape javascript
			if n.Parent == nil || n.Parent.DataAtom != atom.Script {
				s = html.EscapeString(s)
			}
		}
		s = strings.TrimSpace(s)
		if s != "" {
			t.printIndent(level)
			fmt.Println(s)
		}
	case html.ElementNode:
		t.printIndent(level)
		// TODO: allow pre with color
		if n.DataAtom == atom.Pre && !pupPrintColor && pupPreformatted {
			t.printPre(n)
			fmt.Println()
			return
		}
		if pupPrintColor {
			tokenColor.Print("<")
			tagColor.Printf("%s", n.Data)
		} else {
			fmt.Printf("<%s", n.Data)
		}
		for _, a := range n.Attr {
			val := a.Val
			if pupEscapeHTML {
				val = html.EscapeString(val)
			}
			if pupPrintColor {
				fmt.Print(" ")
				attrKeyColor.Printf("%s", a.Key)
				tokenColor.Print("=")
				quoteColor.Printf(`"%s"`, val)
			} else {
				fmt.Printf(` %s="%s"`, a.Key, val)
			}
		}
		if pupPrintColor {
			tokenColor.Println(">")
		} else {
			fmt.Println(">")
		}
		if !isVoidElement(n) {
			t.printChildren(n, level+1)
			t.printIndent(level)
			if pupPrintColor {
				tokenColor.Print("</")
				tagColor.Printf("%s", n.Data)
				tokenColor.Println(">")
			} else {
				fmt.Printf("</%s>\n", n.Data)
			}
		}
	case html.CommentNode:
		t.printIndent(level)
		data := n.Data
		if pupEscapeHTML {
			data = html.EscapeString(data)
		}
		if pupPrintColor {
			commentColor.Printf("<!--%s-->\n", data)
		} else {
			fmt.Printf("<!--%s-->\n", data)
		}
		t.printChildren(n, level)
	case html.DoctypeNode, html.DocumentNode:
		t.printChildren(n, level)
	}
}