func node_clear_children(frag *html.Node) { for child := frag.FirstChild; child != nil; { next := child.NextSibling frag.RemoveChild(child) child = next } }
func clean_element_before_header(body *html.Node, name string) { child := body.FirstChild for child != nil { if child.Type == html.ElementNode && child.Data != name { next := child.NextSibling body.RemoveChild(child) child = next } else { break } } }
func remove_decentant(n *html.Node, tag string) { child := n.FirstChild for child != nil { if child.Type == html.ElementNode && child.Data == tag { next := child.NextSibling n.RemoveChild(child) child = next } else { remove_decentant(child, tag) child = child.NextSibling } } }
// flatten inlines text image a object video audio seq // n is element-node // inline node may have div element func (this *html_cleaner) clean_inline_node(n *html.Node) { inlines := this.flatten_inline_node(n) for child := n.FirstChild; child != nil; child = n.FirstChild { n.RemoveChild(child) } for _, inline := range inlines { p := inline.Parent if p != nil { p.RemoveChild(inline) // this.article.RemoveChild(child) } n.AppendChild(inline) } }
//删除行前后空白 func (this *html_cleaner) trim_empty_spaces_func(n *html.Node, trim func(string) string) { child := n.FirstChild for child != nil { if child.Type == html.TextNode { child.Data = trim(child.Data) } else { this.trim_empty_spaces_func(child, trim) } if node_is_not_empty(child) { break } next := child.NextSibling n.RemoveChild(child) child = next } }
// The splitting process may leave TextNodes with no Data, which we keep // around to make the data manipulation simpler. This function removes // them. func cleanupTree(node *html.Node) { var next *html.Node for n := node.FirstChild; n != nil; n = next { next = n.NextSibling switch n.Type { case html.TextNode: if len(n.Data) == 0 { node.RemoveChild(n) } case html.ElementNode: cleanupTree(n) default: // ignore other node types. } } }
func processNode(node *html.Node) (err error) { var stackTags [16]openTag tags := stackTags[:0] n := node.FirstChild for n != nil { var next, newParent *html.Node next = n.NextSibling if l := len(tags); l != 0 { newParent = tags[l-1].node } switch n.Type { case html.TextNode: if tags, next, err = processTextNode(n, tags); err != nil { return } case html.ElementNode: if err = processNode(n); err != nil { return } default: // Other node types are just ignored. } // reparent the active node if necessary if newParent != nil { node.RemoveChild(n) newParent.AppendChild(n) } n = next } if len(tags) != 0 { err = fmt.Errorf("shortcodes still open at end of surrounding HTML tag: %+v", tags) } return }
// sanitizeUnwrap traverses pre-order over the nodes, reattaching // the whitelisted children of any element nodes that are not // whitelisted to the parent of the unwhitelisted node func (w *Whitelist) sanitizeUnwrap(n *html.Node) error { return w.sanitizeNode(n, func(n *html.Node) bool { if w.HasElement(n.Data) || n.Parent == nil { return true } insertBefore := n.NextSibling firstChild := n.FirstChild for c := n.FirstChild; c != nil; { nodeToUnwrap := c c = c.NextSibling n.RemoveChild(nodeToUnwrap) n.Parent.InsertBefore(nodeToUnwrap, insertBefore) } n.Parent.RemoveChild(n) // reset next sibling to support continuation // of linked-list style traversal of parent node's children n.NextSibling = firstChild return false }) }
//整理html文档,将block-level/inline-level混合的节点改成只有block-level的节点 //对已只有inline-level的节点,删除行前后的空白符 //将包含inline-level的节点展开成更为简单的形式,去掉想<font><span><strong>等等格式节点 func (this *html_cleaner) clean_block_node(n *html.Node) { blks := node_has_block_children(n) inlines := node_has_inline_children(n) // has bocks and inlines if blks && inlines { child := n.FirstChild for child != nil { if node_is_inline(child) { p := child.PrevSibling if p == nil || p.Data != "p" { p = create_element("p") n.InsertBefore(p, child) } n.RemoveChild(child) p.AppendChild(child) child = p.NextSibling } else { child = child.NextSibling } } inlines = false } // only inlines if blks == false && inlines { this.clean_inline_node(n) this.trim_empty_spaces(n) } // only blocks if blks && !inlines { for child := n.FirstChild; child != nil; child = child.NextSibling { this.clean_block_node(child) } } }
func removeChildren(n *html.Node) { for c := n.FirstChild; c != nil; c = c.NextSibling { defer n.RemoveChild(c) } }
func remove_children(a *html.Node) { for a.FirstChild != nil { a.RemoveChild(a.FirstChild) } }