// returns a jsonifiable struct func jsonify(node *html.Node) map[string]interface{} { vals := map[string]interface{}{} if len(node.Attr) > 0 { for _, attr := range node.Attr { if pupEscapeHTML { vals[attr.Key] = html.EscapeString(attr.Val) } else { vals[attr.Key] = attr.Val } } } vals["tag"] = node.DataAtom.String() children := []interface{}{} for child := node.FirstChild; child != nil; child = child.NextSibling { switch child.Type { case html.ElementNode: children = append(children, jsonify(child)) case html.TextNode: text := strings.TrimSpace(child.Data) if text != "" { if pupEscapeHTML { // don't escape javascript if node.DataAtom != atom.Script { text = html.EscapeString(text) } } // if there is already text we'll append it currText, ok := vals["text"] if ok { text = fmt.Sprintf("%s %s", currText, text) } vals["text"] = text } case html.CommentNode: comment := strings.TrimSpace(child.Data) if pupEscapeHTML { comment = html.EscapeString(comment) } currComment, ok := vals["comment"] if ok { comment = fmt.Sprintf("%s %s", currComment, comment) } vals["comment"] = comment } } if len(children) > 0 { vals["children"] = children } return vals }
// The <pre> tag indicates that the text within it should always be formatted // as is. See https://github.com/ericchiang/pup/issues/33 func (t TreeDisplayer) printPre(n *html.Node) { switch n.Type { case html.TextNode: s := n.Data if pupEscapeHTML { // don't escape javascript if n.Parent == nil || n.Parent.DataAtom != atom.Script { s = html.EscapeString(s) } } fmt.Print(s) for c := n.FirstChild; c != nil; c = c.NextSibling { t.printPre(c) } case html.ElementNode: fmt.Printf("<%s", n.Data) for _, a := range n.Attr { val := a.Val if pupEscapeHTML { val = html.EscapeString(val) } fmt.Printf(` %s="%s"`, a.Key, val) } fmt.Print(">") if !isVoidElement(n) { for c := n.FirstChild; c != nil; c = c.NextSibling { t.printPre(c) } fmt.Printf("</%s>", n.Data) } case html.CommentNode: data := n.Data if pupEscapeHTML { data = html.EscapeString(data) } fmt.Printf("<!--%s-->\n", data) for c := n.FirstChild; c != nil; c = c.NextSibling { t.printPre(c) } case html.DoctypeNode, html.DocumentNode: for c := n.FirstChild; c != nil; c = c.NextSibling { t.printPre(c) } } }
func (a AttrDisplayer) Display(nodes []*html.Node) { for _, node := range nodes { attributes := node.Attr for _, attr := range attributes { if attr.Key == a.Attr { val := attr.Val if pupEscapeHTML { val = html.EscapeString(val) } fmt.Printf("%s\n", val) } } } }
func (t TextDisplayer) Display(nodes []*html.Node) { for _, node := range nodes { if node.Type == html.TextNode { data := node.Data if pupEscapeHTML { // don't escape javascript if node.Parent == nil || node.Parent.DataAtom != atom.Script { data = html.EscapeString(data) } } fmt.Println(data) } children := []*html.Node{} child := node.FirstChild for child != nil { children = append(children, child) child = child.NextSibling } t.Display(children) } }
// Print a node and all of it's children to `maxlevel`. func (t TreeDisplayer) printNode(n *html.Node, level int) { switch n.Type { case html.TextNode: s := n.Data if pupEscapeHTML { // don't escape javascript if n.Parent == nil || n.Parent.DataAtom != atom.Script { s = html.EscapeString(s) } } s = strings.TrimSpace(s) if s != "" { t.printIndent(level) fmt.Println(s) } case html.ElementNode: t.printIndent(level) // TODO: allow pre with color if n.DataAtom == atom.Pre && !pupPrintColor && pupPreformatted { t.printPre(n) fmt.Println() return } if pupPrintColor { tokenColor.Print("<") tagColor.Printf("%s", n.Data) } else { fmt.Printf("<%s", n.Data) } for _, a := range n.Attr { val := a.Val if pupEscapeHTML { val = html.EscapeString(val) } if pupPrintColor { fmt.Print(" ") attrKeyColor.Printf("%s", a.Key) tokenColor.Print("=") quoteColor.Printf(`"%s"`, val) } else { fmt.Printf(` %s="%s"`, a.Key, val) } } if pupPrintColor { tokenColor.Println(">") } else { fmt.Println(">") } if !isVoidElement(n) { t.printChildren(n, level+1) t.printIndent(level) if pupPrintColor { tokenColor.Print("</") tagColor.Printf("%s", n.Data) tokenColor.Println(">") } else { fmt.Printf("</%s>\n", n.Data) } } case html.CommentNode: t.printIndent(level) data := n.Data if pupEscapeHTML { data = html.EscapeString(data) } if pupPrintColor { commentColor.Printf("<!--%s-->\n", data) } else { fmt.Printf("<!--%s-->\n", data) } t.printChildren(n, level) case html.DoctypeNode, html.DocumentNode: t.printChildren(n, level) } }