//整理html文档,将block-level/inline-level混合的节点改成只有block-level的节点 //对已只有inline-level的节点,删除行前后的空白符 //将包含inline-level的节点展开成更为简单的形式,去掉想<font><span><strong>等等格式节点 func (this *html_cleaner) clean_block_node(n *html.Node) { blks := node_has_block_children(n) inlines := node_has_inline_children(n) // has bocks and inlines if blks && inlines { child := n.FirstChild for child != nil { if node_is_inline(child) { p := child.PrevSibling if p == nil || p.Data != "p" { p = create_element("p") n.InsertBefore(p, child) } n.RemoveChild(child) p.AppendChild(child) child = p.NextSibling } else { child = child.NextSibling } } inlines = false } // only inlines if blks == false && inlines { this.clean_inline_node(n) this.trim_empty_spaces(n) } // only blocks if blks && !inlines { for child := n.FirstChild; child != nil; child = child.NextSibling { this.clean_block_node(child) } } }