Beispiel #1
0
func node_clear_children(frag *html.Node) {
	for child := frag.FirstChild; child != nil; {
		next := child.NextSibling
		frag.RemoveChild(child)
		child = next
	}
}
Beispiel #2
0
func clean_element_before_header(body *html.Node, name string) {
	child := body.FirstChild
	for child != nil {
		if child.Type == html.ElementNode && child.Data != name {
			next := child.NextSibling
			body.RemoveChild(child)
			child = next
		} else {
			break
		}
	}
}
Beispiel #3
0
func remove_decentant(n *html.Node, tag string) {
	child := n.FirstChild
	for child != nil {
		if child.Type == html.ElementNode && child.Data == tag {
			next := child.NextSibling
			n.RemoveChild(child)
			child = next
		} else {
			remove_decentant(child, tag)
			child = child.NextSibling
		}
	}
}
Beispiel #4
0
// flatten inlines text image a object video audio seq
// n is element-node
// inline node may have div element
func (this *html_cleaner) clean_inline_node(n *html.Node) {
	inlines := this.flatten_inline_node(n)

	for child := n.FirstChild; child != nil; child = n.FirstChild {
		n.RemoveChild(child)
	}
	for _, inline := range inlines {
		p := inline.Parent
		if p != nil {
			p.RemoveChild(inline) //			this.article.RemoveChild(child)

		}
		n.AppendChild(inline)
	}
}
Beispiel #5
0
//删除行前后空白
func (this *html_cleaner) trim_empty_spaces_func(n *html.Node, trim func(string) string) {
	child := n.FirstChild
	for child != nil {
		if child.Type == html.TextNode {
			child.Data = trim(child.Data)
		} else {
			this.trim_empty_spaces_func(child, trim)
		}
		if node_is_not_empty(child) {
			break
		}
		next := child.NextSibling
		n.RemoveChild(child)
		child = next
	}
}
Beispiel #6
0
// The splitting process may leave TextNodes with no Data, which we keep
// around to make the data manipulation simpler. This function removes
// them.
func cleanupTree(node *html.Node) {
	var next *html.Node
	for n := node.FirstChild; n != nil; n = next {
		next = n.NextSibling
		switch n.Type {
		case html.TextNode:
			if len(n.Data) == 0 {
				node.RemoveChild(n)
			}
		case html.ElementNode:
			cleanupTree(n)
		default:
			// ignore other node types.
		}
	}
}
Beispiel #7
0
func processNode(node *html.Node) (err error) {
	var stackTags [16]openTag
	tags := stackTags[:0]

	n := node.FirstChild
	for n != nil {
		var next, newParent *html.Node

		next = n.NextSibling
		if l := len(tags); l != 0 {
			newParent = tags[l-1].node
		}

		switch n.Type {
		case html.TextNode:
			if tags, next, err = processTextNode(n, tags); err != nil {
				return
			}
		case html.ElementNode:
			if err = processNode(n); err != nil {
				return
			}
		default:
			// Other node types are just ignored.
		}

		// reparent the active node if necessary
		if newParent != nil {
			node.RemoveChild(n)
			newParent.AppendChild(n)
		}

		n = next
	}

	if len(tags) != 0 {
		err = fmt.Errorf("shortcodes still open at end of surrounding HTML tag: %+v", tags)
	}

	return
}
Beispiel #8
0
// sanitizeUnwrap traverses pre-order over the nodes, reattaching
// the whitelisted children of any element nodes that are not
// whitelisted to the parent of the unwhitelisted node
func (w *Whitelist) sanitizeUnwrap(n *html.Node) error {
	return w.sanitizeNode(n, func(n *html.Node) bool {
		if w.HasElement(n.Data) || n.Parent == nil {
			return true
		}

		insertBefore := n.NextSibling
		firstChild := n.FirstChild
		for c := n.FirstChild; c != nil; {
			nodeToUnwrap := c
			c = c.NextSibling

			n.RemoveChild(nodeToUnwrap)
			n.Parent.InsertBefore(nodeToUnwrap, insertBefore)
		}
		n.Parent.RemoveChild(n)

		// reset next sibling to support continuation
		// of linked-list style traversal of parent node's children
		n.NextSibling = firstChild
		return false
	})
}
Beispiel #9
0
//整理html文档,将block-level/inline-level混合的节点改成只有block-level的节点
//对已只有inline-level的节点,删除行前后的空白符
//将包含inline-level的节点展开成更为简单的形式,去掉想<font><span><strong>等等格式节点
func (this *html_cleaner) clean_block_node(n *html.Node) {
	blks := node_has_block_children(n)
	inlines := node_has_inline_children(n)

	// has bocks and inlines
	if blks && inlines {
		child := n.FirstChild
		for child != nil {
			if node_is_inline(child) {
				p := child.PrevSibling
				if p == nil || p.Data != "p" {
					p = create_element("p")
					n.InsertBefore(p, child)
				}
				n.RemoveChild(child)
				p.AppendChild(child)
				child = p.NextSibling
			} else {
				child = child.NextSibling
			}
		}
		inlines = false
	}

	// only inlines
	if blks == false && inlines {
		this.clean_inline_node(n)
		this.trim_empty_spaces(n)
	}

	// only blocks
	if blks && !inlines {
		for child := n.FirstChild; child != nil; child = child.NextSibling {
			this.clean_block_node(child)
		}
	}
}
Beispiel #10
0
func removeChildren(n *html.Node) {
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		defer n.RemoveChild(c)
	}
}
Beispiel #11
0
func remove_children(a *html.Node) {
	for a.FirstChild != nil {
		a.RemoveChild(a.FirstChild)
	}
}