Example #1
0
//整理html文档,将block-level/inline-level混合的节点改成只有block-level的节点
//对已只有inline-level的节点,删除行前后的空白符
//将包含inline-level的节点展开成更为简单的形式,去掉想<font><span><strong>等等格式节点
func (this *html_cleaner) clean_block_node(n *html.Node) {
	blks := node_has_block_children(n)
	inlines := node_has_inline_children(n)

	// has bocks and inlines
	if blks && inlines {
		child := n.FirstChild
		for child != nil {
			if node_is_inline(child) {
				p := child.PrevSibling
				if p == nil || p.Data != "p" {
					p = create_element("p")
					n.InsertBefore(p, child)
				}
				n.RemoveChild(child)
				p.AppendChild(child)
				child = p.NextSibling
			} else {
				child = child.NextSibling
			}
		}
		inlines = false
	}

	// only inlines
	if blks == false && inlines {
		this.clean_inline_node(n)
		this.trim_empty_spaces(n)
	}

	// only blocks
	if blks && !inlines {
		for child := n.FirstChild; child != nil; child = child.NextSibling {
			this.clean_block_node(child)
		}
	}
}