Golang Node.RemoveChild Examples

Programming Language: Golang

Namespace/Package Name: golang.org/x/net/html

Class/Type: Node

Method/Function: RemoveChild

Examples at hotexamples.com: 21

The `Node.RemoveChild` method is part of the `golang.org/x/net/html` package library for Go. It allows you to remove a child node from a parent node in an HTML document.

Example 1:

doc, err := html.Parse(strings.NewReader("Example
Another example"))
if err != nil {
    log.Fatal(err)
}

for c := doc.FirstChild.FirstChild; c != nil; c = c.NextSibling {
    if c.Type == html.ElementNode && c.Data == "p" {
        doc.FirstChild.RemoveChild(c)
    }
}

In this example, we create an HTML document with two paragraphs inside a div. We then loop through the child nodes of the div's first child (which are the two paragraphs) and check if each node is an element node with the tag "p". If it is, we remove that child node from the div using the `RemoveChild` method.

Example 2:

func removeNode(node *html.Node) {
    if node.Parent == nil {
        return
    }
    node.Parent.RemoveChild(node)
}

In this example, we define a `removeNode` function that takes a pointer to an `html.Node` object as its argument. The function checks if the node has a parent node and, if it does, it removes the node from its parent using the `RemoveChild` method. This function could be used in a larger program to remove specific nodes from an HTML document.

Golang Node.RemoveChild - 21 examples found. These are the top rated real world Golang examples of golang.org/x/net/html.Node.RemoveChild extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Attr(30)

Data(30)

RemoveChild(21)

AppendChild(18)

Type(11)

DataAtom(5)

InsertBefore(5)

FirstChild(4)

LastChild(1)

Namespace(1)

NextSibling(1)

Parent(1)

Example #1

Show file

File: node.go Project: justintan/gox

func CompactNode(n *html.Node) {
	var appendNodes []*html.Node
	for c := n.FirstChild; c != nil; {
		CompactNode(c)
		if _mergeTextElements[c.Data] {
			appendNodes = append(appendNodes, GetChildNodes(c)...)
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else if c.Type == html.ElementNode && c.FirstChild == nil && !_voidElements[c.Data] {
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else {
			c = c.NextSibling
		}
	}

	DetachNodes(appendNodes)
	AppendChildNodes(n, appendNodes)
	if n.FirstChild != nil && n.FirstChild.NextSibling == nil {
		if n.FirstChild.Data == n.Data || (n.FirstChild.Data == "br" && (n.Data == "p" || n.Data == "div")) {
			childNodes := GetChildNodes(n.FirstChild)
			log.Info("delete", n.FirstChild.Data)
			n.RemoveChild(n.FirstChild)
			DetachNodes(childNodes)
			AppendChildNodes(n, childNodes)
		} else if n.FirstChild.Data == "img" && n.Data == "a" {
			*n = *n.FirstChild
		}
	}
}

Example #2

Show file

File: parser_utils.go Project: ReanGD/go-web-search

func (u *parserUtils) mergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
	prevText := prev != nil && prev.Type == html.TextNode
	nextText := next != nil && next.Type == html.TextNode
	delim := ""
	if addSeparator {
		delim = " "
	}

	if prevText && nextText {
		prev.Data = prev.Data + delim + next.Data
		parent.RemoveChild(next)
		return prev.NextSibling
	}

	if prevText {
		prev.Data = prev.Data + delim
	} else if nextText {
		next.Data = delim + next.Data
	} else if addSeparator {
		newNode := &html.Node{
			Type: html.TextNode,
			Data: delim}
		parent.InsertBefore(newNode, next)
	}

	return next
}

Example #3

Show file

File: 03_top_down_v1.go Project: aarzilli/tools

/*
   div                     div
       div                     p
           p         TO        img
           img                 p
           p


	Operates from the *middle* div.
	Saves all children in inverted slice.
	Removes each child and reattaches it one level higher.
	Finally the intermediary, now childless div is removed.




   \                  /
    \       /\       /
     \_____/  \_____/

     \              /
      \_____/\_____/

       \__________/     => Breaks are gone


       \p1___p2___/     => Wrapping preserves breaks




*/
func topDownV1(n *html.Node, couple []string, parentType string) {

	if noParent(n) {
		return
	}
	p := n.Parent

	parDiv := p.Type == html.ElementNode && p.Data == couple[0] // Parent is a div
	iAmDiv := n.Type == html.ElementNode && n.Data == couple[1] // I am a div

	noSiblings := n.PrevSibling == nil && n.NextSibling == nil

	only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
	svrlChildn := n.FirstChild != nil && n.FirstChild != n.LastChild
	noChildren := n.FirstChild == nil

	_, _ = noSiblings, noChildren

	if parDiv && iAmDiv {

		if only1Child || svrlChildn {

			var children []*html.Node
			for c := n.FirstChild; c != nil; c = c.NextSibling {
				children = append([]*html.Node{c}, children...) // order inversion
			}

			insertionPoint := n.NextSibling
			for _, c1 := range children {

				n.RemoveChild(c1)

				if c1.Type == html.TextNode || c1.Data == "a" {
					// pf("wrapping %v\n", NodeTypeStr(c1.Type))
					wrap := html.Node{Type: html.ElementNode, Data: "p",
						Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
					wrap.FirstChild = c1
					p.InsertBefore(&wrap, insertionPoint)
					c1.Parent = &wrap
					insertionPoint = &wrap

				} else {
					p.InsertBefore(c1, insertionPoint)
					insertionPoint = c1
				}

			}
			p.RemoveChild(n)
			if p.Data != parentType {
				p.Data = parentType
			}

		}

	}

}

Example #4

Show file

File: 01_cleanse.go Project: aarzilli/tools

// We want to remove some children.
// A direct loop is impossible,
// since "NextSibling" is set to nil during Remove().
// Therefore:
//   First assemble children separately.
//   Then remove them.
func removeUnwanted(n *html.Node) {
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		if unwanteds[c.Data] {
			n.RemoveChild(c)
		}
	}
}

Example #5

Show file

File: 6_dir_digest_3.go Project: aarzilli/tools

func removeUnwanted(n *html.Node) {
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		if n.Type == html.ElementNode && n.Data == "script" || n.Type == html.CommentNode {
			n.RemoveChild(c)
		}
	}
}

Example #6

Show file

File: mutate.go Project: albertjin/goquery

// Replace the given node's children with the given string.
func setNodeText(node *html.Node, s string) {
	// remove all existing children
	for node.FirstChild != nil {
		node.RemoveChild(node.FirstChild)
	}
	// add the text
	node.AppendChild(&html.Node{
		Type: html.TextNode,
		Data: s,
	})
}

Example #7

Show file

File: readability.go Project: jpoehls/feedmailer

func replaceNodeWithChildren(n *html.Node) {
	var next *html.Node
	parent := n.Parent

	for c := n.FirstChild; c != nil; c = next {
		next = c.NextSibling
		n.RemoveChild(c)

		parent.InsertBefore(c, n)
	}

	parent.RemoveChild(n)
}

Example #8

Show file

File: 03_condense_top_down.go Project: aarzilli/tools

func removeEmptyNodes(n *html.Node, lvl int) {

	// children
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		removeEmptyNodes(c, lvl+1)
	}

	// processing
	// empty element nodes
	if n.Type == html.ElementNode && n.Data == "img" {
		src := attrX(n.Attr, "src")
		if src == "" {
			n.Parent.RemoveChild(n)
		}
	}

	if n.Type == html.ElementNode && n.FirstChild == nil && n.Data == "a" {
		href := attrX(n.Attr, "href")
		if href == "#" || href == "" {
			n.Parent.RemoveChild(n)
		}
	}

	if n.Type == html.ElementNode && n.FirstChild == nil &&
		(n.Data == "em" || n.Data == "strong") {
		n.Parent.RemoveChild(n)
	}

	if n.Type == html.ElementNode && n.FirstChild == nil &&
		(n.Data == "div" || n.Data == "span" || n.Data == "li" || n.Data == "p") {
		n.Parent.RemoveChild(n)
	}

	// spans with less than 2 characters inside => flatten to text
	only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
	if n.Type == html.ElementNode &&
		n.Data == "span" &&
		only1Child &&
		n.FirstChild.Type == html.TextNode &&
		len(strings.TrimSpace(n.FirstChild.Data)) < 3 {
		n.Type = html.TextNode
		n.Data = n.FirstChild.Data
		n.RemoveChild(n.FirstChild)
	}

}

Example #9

Show file

File: clean.go Project: documize/html-diff

// clean normalises styles/colspan and removes any CleanTags specified, along with newlines;
// but also makes all the character handling (for example "&#160;" as utf-8) the same.
// It returns the estimated number of treeRunes that will be used.
// TODO more cleaning of the input HTML, as required.
func (c *Config) clean(n *html.Node) int {
	size := 1
	switch n.Type {
	case html.ElementNode:
		for ai := 0; ai < len(n.Attr); ai++ {
			a := n.Attr[ai]
			switch {
			case strings.ToLower(a.Key) == "style":
				if strings.TrimSpace(a.Val) == "" { // delete empty styles
					n.Attr = delAttr(n.Attr, ai)
					ai--
				} else { // tidy non-empty styles
					// TODO there could be more here to make sure the style entries are in the same order etc.
					n.Attr[ai].Val = strings.Replace(a.Val, " ", "", -1)
					if !strings.HasSuffix(n.Attr[ai].Val, ";") {
						n.Attr[ai].Val += ";"
					}
				}
			case n.DataAtom == atom.Td &&
				strings.ToLower(a.Key) == "colspan" &&
				strings.TrimSpace(a.Val) == "1":
				n.Attr = delAttr(n.Attr, ai)
				ai--
			}
		}
	case html.TextNode:
		n.Data = htm.UnescapeString(n.Data)
		size += utf8.RuneCountInString(n.Data) - 1 // len(n.Data) would be faster, but use more memory
	}
searchChildren:
	for ch := n.FirstChild; ch != nil; ch = ch.NextSibling {
		switch ch.Type {
		case html.ElementNode:
			for _, rr := range c.CleanTags {
				if rr == ch.Data {
					n.RemoveChild(ch)
					goto searchChildren
				}
			}
		}
		size += c.clean(ch)
	}
	return size
}

Example #10

Show file

File: html.go Project: jwatt/kythe

// sliceNode returns the two halves of the HTML tree starting at node after
// splitting it at the given textual offset.
func sliceNode(offsets *nodeOffsets, node *html.Node, offset int) (*html.Node, *html.Node) {
	origStart, origEnd := offsets.Bounds(node)
	if origStart > offset || origEnd < offset {
		log.Fatalf("sliceNode: offset %d out of node's span (%d → %d)", offset, origStart, origEnd)
	}

	n, m := copyNode(node), copyNode(node)
	parent := node.Parent
	if parent != nil {
		parent.InsertBefore(n, node)
		parent.InsertBefore(m, node)
		parent.RemoveChild(node)
	}

	switch node.Type {
	default:
		log.Fatalf("Unhandled node kind: %d", node.Type)
	case html.ElementNode:
		child := node.FirstChild
		for child != nil {
			next := child.NextSibling

			if _, end := offsets.Bounds(child); end <= offset {
				node.RemoveChild(child)
				n.AppendChild(child)
			} else if start, _ := offsets.Bounds(child); start > offset {
				node.RemoveChild(child)
				m.AppendChild(child)
			} else {
				left, right := sliceNode(offsets, child, offset)
				node.RemoveChild(left)
				node.RemoveChild(right)
				n.AppendChild(left)
				m.AppendChild(right)
			}

			child = next
		}
	case html.TextNode:
		mark := offset - origStart
		n.Data = node.Data[:mark]
		m.Data = node.Data[mark:]
	}

	if split := offsets.update(n, origStart); split != offset {
		log.Fatalf("split %d ≠ %d", split, offset)
	}
	if newEnd := offsets.update(m, offset); newEnd != origEnd {
		log.Fatalf("end %d ≠ %d", newEnd, origEnd)
	}

	return n, m
}

Example #11

Show file

File: minify.go Project: membase/ns_server

// Minifies node and returns a minification Result.
func doMinify(node *html.Node, ctx *context) result {
	prevWasWhitespace := false
	var next *html.Node
	rv := result{}
	for child := node.FirstChild; child != nil; child = next {
		next = child.NextSibling
		script := getHTMLNodeAttr(child, "script", "src")
		if rv.IndexHTMLBase == "" {
			rv.IndexHTMLBase = getHTMLNodeAttr(child, "base", "href")
		}
		switch {
		case strings.Contains(script, "libs/") && strings.HasSuffix(script, ".js"):
			minFile := script[:len(script)-3] + ".min.js"
			if _, err := os.Stat(filepath.Join(ctx.BaseDir, minFile)); err == nil {
				replaceAttrValue(child, "src", minFile)
			}
			prevWasWhitespace = false
		case strings.HasSuffix(script, ".js"):
			if !ctx.FoundFirstAppScript {
				ctx.FoundFirstAppScript = true
				node.InsertBefore(makeAppMinJsNode(), child)
				node.InsertBefore(makeNewLine(), child)
			}
			rv.AppScripts = append(rv.AppScripts, script)
			node.RemoveChild(child)
		case isWhitespaceText(child) && node.Type == html.ElementNode && node.Data == "head":
			if !prevWasWhitespace {
				node.InsertBefore(makeNewLine(), child)
			}
			node.RemoveChild(child)
			prevWasWhitespace = true
		default:
			if isPluggableUIInjectionComment(child) {
				rv.PluggableInjectionCount++
			} else {
				childResult := doMinify(child, ctx)
				rv.merge(childResult)
			}
			prevWasWhitespace = false
		}
	}
	return rv
}

Example #12

Show file

File: document.go Project: ckome/newscat

// cleanBody removes unwanted HTML elements from the HTML body.
func (doc *Document) cleanBody(n *html.Node, level int) {
	// removeNode returns true if a node should be removed from HTML document.
	removeNode := func(c *html.Node, level int) bool {
		return removeElements[c.DataAtom]
	}

	var curr *html.Node = n.FirstChild
	var next *html.Node = nil
	for ; curr != nil; curr = next {
		// We have to remember the next sibling here because calling RemoveChild
		// sets curr's NextSibling pointer to nil and we would quit the loop
		// prematurely.
		next = curr.NextSibling
		if curr.Type == html.ElementNode {
			if removeNode(curr, level) {
				n.RemoveChild(curr)
			} else {
				doc.cleanBody(curr, level+1)
			}
		}
	}
}

Example #13

Show file

File: cleaner.go Project: BenLubar/htmlcleaner

func cleanChildren(c *Config, parent *html.Node) {
	var children []*html.Node
	for parent.FirstChild != nil {
		child := parent.FirstChild
		parent.RemoveChild(child)
		children = append(children, filterNode(c, child))
	}

	if c.WrapText {
		_, ok := c.wrap[parent.DataAtom]
		if !ok && parent.DataAtom == 0 {
			_, ok = c.wrapCustom[parent.Data]
		}
		if ok {
			children = wrapText(children)
		}
	}

	for _, child := range children {
		parent.AppendChild(child)
	}
}

Example #14

Show file

File: cleaner.go Project: Vetcher/pagedownloader

//return true if need to delete node, false another way
func deleteValuelessNodes(innode *html.Node) bool {
	if innode.Type == html.CommentNode {
		//fmt.Println("comment:" + innode.Data)
		return true
	}
	if innode.Type == html.ElementNode {
		//innode.Attr = []html.Attribute{}
		if innode.Data == "script" || innode.Data == "meta" || innode.Data == "style" || innode.Data == "head" || innode.Data == "form" || innode.Data == "noscript" || innode.Data == "img" || innode.Data == "noindex" || innode.Data == "span" {
			//fmt.Println("script: " + innode.Data)
			return true
		}
	}
	for node := innode.FirstChild; node != nil; {
		if deleteValuelessNodes(node) {
			tnode := node.NextSibling
			innode.RemoveChild(node)
			node = tnode
			continue
		}
		node = node.NextSibling
	}
	return false
}

Example #15

Show file

File: 10_textify_brute_force.go Project: aarzilli/tools

func textifyNodeSubtree(n *html.Node) {

	if n.Type == html.ElementNode {

		nd := dom.Nd("text")
		nd.Data = textifySubtreeBruteForce(n, 0)
		nd.Data = stringspb.NormalizeInnerWhitespace(nd.Data)

		cc := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cc = append(cc, c)
		}
		for _, c := range cc {
			n.RemoveChild(c)
		}

		n.AppendChild(nd)

		nd2 := dom.Nd("br")
		dom.InsertAfter(n, nd2)

	}

}

Example #16

Show file

File: 07_condense_bottom_up_v2.go Project: aarzilli/tools

func flattenSubtreeV2(n *html.Node, b *bytes.Buffer, depth int, tpar *html.Node) (*bytes.Buffer, *html.Node) {

	if b == nil {
		b = new(bytes.Buffer)
	}
	if tpar == nil {
		tpar = &html.Node{
			Type:     n.Type,
			DataAtom: n.DataAtom,
			Data:     n.Data,
			Attr:     make([]html.Attribute, len(n.Attr)),
		}
		copy(tpar.Attr, n.Attr)
	}

	switch {
	case n.Type == html.ElementNode && n.Data == "a":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
		// wpf(b, "[a] ")
	case n.Type == html.ElementNode && n.Data == "img":
		// img2Link(n)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "em" || n.Data == "strong":
		wpf(b, "[%v l%v] ", n.Data, depth)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "label" || n.Data == "input" || n.Data == "textarea":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "p" || n.Data == "div" || n.Data == "li" || n.Data == "ol" || n.Data == "h1" || n.Data == "h2" || n.Data == "ul":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "span":
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			n.RemoveChild(c)
			tpar.AppendChild(c)
		}
		n.Parent.RemoveChild(n)
	case n.Type == html.TextNode && n.Data != "":
		n.Data = strings.TrimSpace(n.Data)
		n.Data += " "
		wpf(b, n.Data)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	default:
		log.Printf("unhandled %s %s\n", dom.NodeTypeStr(n.Type), n.Data)
		n.Parent.RemoveChild(n)
	}

	//
	//
	children := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		// fmt.Printf("still has children %v\n", c.Data)
		children = append(children, c) //  assembling separately, before removing.
	}
	for _, c := range children {
		flattenSubtreeV2(c, b, depth+1, tpar)
	}

	return b, tpar
}

Example #17

Show file

File: transform.go Project: asartalo/go-html-transform

func removeChildren(n *html.Node) {
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		defer n.RemoveChild(c)
	}
}

Example #18

Show file

File: 03_top_down_v3.go Project: aarzilli/tools

// Now this third implementation finally condenses *selectively*.
// Not all boats from each pond are lifted equally.
// We achieve tremendous structural simplification.
// It also starts from top, pulling lower levels up.
// Unlike implementation #1, that started from the middle.
func topDownV3(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {

	if l1.Type != html.ElementNode &&
		l1.Type != html.DocumentNode {
		return // cannot assign to - do not unable to have children
	}
	if l1.Data == "span" || l1.Data == "a" {
		return // want not condense into
	}

	// dig two levels deep

	// isolate l2,l3
	l2s := []*html.Node{}
	l3s := map[*html.Node][]*html.Node{}

	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {

		l2s = append(l2s, l2)
		// l2s = append([]*html.Node{l2}, l2s...) // order inversion

		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
			l3s[l2] = append(l3s[l2], l3)
			// l3s[l2] = append(map[*html.Node][]*html.Node{l2: []*html.Node{l3}}, l3s[l2]...) // order inversion
		}
	}

	postponedRemoval := map[*html.Node]bool{}

	//
	//
	// check types for each l2 subtree distinctively
	for _, l2 := range l2s {

		l2Match := l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div

		l3Match := true
		for _, l3 := range l3s[l2] {
			l3Match = l3Match && (l3.Type == html.ElementNode && l3Types[l3.Data])
		}

		// act
		if l2Match && l3Match {

			// detach l3 from l2
			for _, l3 := range l3s[l2] {
				// if ml3[l3] > 0 {
				// 	fmt.Printf("rmd_%v_%v ", ml3[l3], l3.Data)
				// }
				l2.RemoveChild(l3)
				// ml3[l3]++
			}

			// Since we still need l2 below
			// We have to postpone detaching l2 from l1
			// to the bottom
			// NOT HERE: l1.RemoveChild(l2)
			postponedRemoval[l2] = true

			for _, l3 := range l3s[l2] {
				// attach l3 to l1

				if l3.Data != "a" && l3.Data != "span" {
					l1.InsertBefore(l3, l2)
				} else {
					wrap := dom.Nd("p")
					wrap.Attr = []html.Attribute{html.Attribute{Key: "cfrm", Val: "noth"}}
					wrap.AppendChild(l3)
					// NOT  wrap.FirstChild = l3
					l1.InsertBefore(wrap, l2)
				}
			}

		}

	}

	for k, _ := range postponedRemoval {
		l1.RemoveChild(k) // detach l2 from l1
	}

}

Example #19

Show file

File: 07_condense_bottom_up_v2.go Project: aarzilli/tools

func condenseBottomUpV2(n *html.Node, lvl, lvlDo int, types map[string]bool) {

	if lvl < lvlDo {

		cs := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cs = append(cs, c)
		}
		for _, c := range cs {
			condenseBottomUpV2(c, lvl+1, lvlDo, types)
		}

	} else {

		// log.Printf("action on %v %v\n", lvl, lvlDo)

		switch {

		case n.Type == html.ElementNode && types[n.Data]:

			oldPar := n.Parent
			if oldPar == nil {
				return
			}

			b, newPar := flattenSubtreeV2(n, nil, 0, nil)

			// placeholder := dom.Nd("div")
			// par := n.Parent
			// par.InsertBefore(placeholder, n.NextSibling)
			// par.RemoveChild(n)
			// par.InsertBefore(n2, placeholder)

			for c := oldPar.FirstChild; c != nil; c = c.NextSibling {
				oldPar.RemoveChild(c)
			}

			for c := newPar.FirstChild; c != nil; c = c.NextSibling {
				newPar.RemoveChild(c)
				oldPar.AppendChild(c)
			}

			if lvlDo > 4 {
				bx := dom.PrintSubtree(newPar)
				fmt.Printf("%s", bx)
			}

			// n = n2

			nodeRepl := dom.Nd("text", b.String())

			if false {

				// Remove all existing children.
				// Direct loop impossible, since "NextSibling" is set to nil by Remove().
				children := []*html.Node{}
				for c := n.FirstChild; c != nil; c = c.NextSibling {
					children = append(children, c) //  assembling separately, before removing.
				}
				for _, c := range children {
					log.Printf("c %4v rem from %4v ", c.Data, n.Data)
					n.RemoveChild(c)
				}

				// we can't put our replacement "under" an image, since img cannot have children
				if n.Type == html.ElementNode && n.Data == "img" {
					n.Parent.InsertBefore(nodeRepl, n.NextSibling) // if n.NextSibling==nil => insert at the end
					n.Parent.RemoveChild(n)
				} else {
					n.AppendChild(nodeRepl)
				}

				// Insert a  || and a newline before every <a...>
				// if n.Data == "a" {
				// 	n.Parent.InsertBefore(dom.Nd("text", " || "), n)
				// }
			}

		default:
		}

	}

}

Example #20

Show file

File: 03_top_down_v2.go Project: aarzilli/tools

// Condense upwards builds a three-levels subtree
// starting from param node l1
// l2 and l3 nodes need to comply by type
//
// Then l3 is moved under l1; l2 is eliminated
//
// For <a> or "text" l3 nodes, we could introduce wrappers
//
// l2Types so far always is "div".
// Multiple l2Types are possible, but difficult to imagine.
//
// l1 type could be changed - from div to ul for instance, but I found no use for that
//
// Implementation yields similar result as condenseTopDown1
// but the "all-or-nothing" logic is clearer
func topDownV2(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {

	if l1.Type != html.ElementNode &&
		l1.Type != html.DocumentNode {
		return // cannot assign to - do not unable to have children
	}
	if l1.Data == "span" || l1.Data == "a" {
		return // want not condense into
	}

	// dig two levels deeper

	// isolate l2
	var l2s []*html.Node
	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {
		l2s = append(l2s, l2)
		// l2s = append([]*html.Node{l2}, l2s...) // order inversion
	}

	// measure types
	l2Div := true

	// note that *all* l3 must have l3Type, not just those those of one l2 element
	// otherwise we get only partial restructuring - and therefore sequence errors
	l3Div := true

	for _, l2 := range l2s {
		l2Div = l2Div && l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div
		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
			l3Div = l3Div && (l3.Type == html.ElementNode && l3Types[l3.Data]) // l3 is a div or ul or form
		}
	}

	// act
	if l2Div && l3Div {
		for _, l2 := range l2s {

			// isolate l3
			var l3s []*html.Node
			for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
				l3s = append(l3s, l3)
				// l3s = append([]*html.Node{l3}, l3s...) // order inversion
			}

			// detach l3 from l2
			for _, l3 := range l3s {
				l2.RemoveChild(l3)
			}
			l1.RemoveChild(l2) // detach l2 from l1

			for _, l3 := range l3s {
				// attach l3 to l1, possible wrapper of <a> or <span>
				l1.InsertBefore(l3, nil) // insert at end

				// wrap := html.Node{Type: html.ElementNode, Data: "p", Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
				// wrap.FirstChild = c1
				// l1.InsertBefore(&wrap, nil)

			}

		}
	}

}

Example #21

Show file

File: 05_breakout_imgs_from_a_trees.go Project: aarzilli/tools

func breakoutImagesFromAnchorTrees(n *html.Node) {

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		breakoutImagesFromAnchorTrees(c)
	}

	if n.Type == html.ElementNode && n.Data == "a" {

		img, lvl := searchImg(n, nil, 0)

		if img != nil {

			only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
			if lvl == 1 && only1Child {
				// log.Printf("only child image lvl %v a\n", lvl)
				n.RemoveChild(img)
				n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
				contnt := urlBeautify(attrX(n.Attr, "href"))
				if len(contnt) < 6 {
					contnt = "[was img] " + contnt
				}
				n.AppendChild(dom.Nd("text", contnt))
			} else {

				if debugBreakOut {
					b0 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b0)
				}

				// log.Printf("  got it  %v\n", img.Data)
				a1 := dom.CloneNodeWithSubtree(n)
				fc1 := closureDeleter(true)
				fc1(n, 0, false)
				if debugBreakOut {
					b1 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b1)
				}

				fc2 := closureDeleter(false)
				fc2(a1, 0, false)
				if debugBreakOut {
					b2 := dom.PrintSubtree(a1)
					log.Printf("\n%s\n", b2)
					log.Printf("--------------------\n")
				}

				if true {
					n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
					n.Parent.InsertBefore(a1, img.NextSibling)
				} else {
					// old way ; sequence corrpution if n had rightwise siblings.
					n.Parent.AppendChild(img)
					n.Parent.AppendChild(a1)

				}

			}

			// changing image to link later

		} else {
			// log.Printf("no img in a\n")
		}
	}

}