Golang Node.RemoveChild примеры, golang.org/x/net/html.Node.RemoveChild Golang примеры использования

Пример #1

0

Показать файл

Файл: node.go Проект: justintan/gox

func CompactNode(n *html.Node) {
	var appendNodes []*html.Node
	for c := n.FirstChild; c != nil; {
		CompactNode(c)
		if _mergeTextElements[c.Data] {
			appendNodes = append(appendNodes, GetChildNodes(c)...)
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else if c.Type == html.ElementNode && c.FirstChild == nil && !_voidElements[c.Data] {
			log.Info("delete", c.Data)
			c = RemoveNode(c)
		} else {
			c = c.NextSibling
		}
	}

	DetachNodes(appendNodes)
	AppendChildNodes(n, appendNodes)
	if n.FirstChild != nil && n.FirstChild.NextSibling == nil {
		if n.FirstChild.Data == n.Data || (n.FirstChild.Data == "br" && (n.Data == "p" || n.Data == "div")) {
			childNodes := GetChildNodes(n.FirstChild)
			log.Info("delete", n.FirstChild.Data)
			n.RemoveChild(n.FirstChild)
			DetachNodes(childNodes)
			AppendChildNodes(n, childNodes)
		} else if n.FirstChild.Data == "img" && n.Data == "a" {
			*n = *n.FirstChild
		}
	}
}

Пример #2

0

Показать файл

Файл: parser_utils.go Проект: ReanGD/go-web-search

func (u *parserUtils) mergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
	prevText := prev != nil && prev.Type == html.TextNode
	nextText := next != nil && next.Type == html.TextNode
	delim := ""
	if addSeparator {
		delim = " "
	}

	if prevText && nextText {
		prev.Data = prev.Data + delim + next.Data
		parent.RemoveChild(next)
		return prev.NextSibling
	}

	if prevText {
		prev.Data = prev.Data + delim
	} else if nextText {
		next.Data = delim + next.Data
	} else if addSeparator {
		newNode := &html.Node{
			Type: html.TextNode,
			Data: delim}
		parent.InsertBefore(newNode, next)
	}

	return next
}

Пример #3

0

Показать файл

Файл: 03_top_down_v1.go Проект: aarzilli/tools

/*
   div                     div
       div                     p
           p         TO        img
           img                 p
           p


	Operates from the *middle* div.
	Saves all children in inverted slice.
	Removes each child and reattaches it one level higher.
	Finally the intermediary, now childless div is removed.




   \                  /
    \       /\       /
     \_____/  \_____/

     \              /
      \_____/\_____/

       \__________/     => Breaks are gone


       \p1___p2___/     => Wrapping preserves breaks




*/
func topDownV1(n *html.Node, couple []string, parentType string) {

	if noParent(n) {
		return
	}
	p := n.Parent

	parDiv := p.Type == html.ElementNode && p.Data == couple[0] // Parent is a div
	iAmDiv := n.Type == html.ElementNode && n.Data == couple[1] // I am a div

	noSiblings := n.PrevSibling == nil && n.NextSibling == nil

	only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
	svrlChildn := n.FirstChild != nil && n.FirstChild != n.LastChild
	noChildren := n.FirstChild == nil

	_, _ = noSiblings, noChildren

	if parDiv && iAmDiv {

		if only1Child || svrlChildn {

			var children []*html.Node
			for c := n.FirstChild; c != nil; c = c.NextSibling {
				children = append([]*html.Node{c}, children...) // order inversion
			}

			insertionPoint := n.NextSibling
			for _, c1 := range children {

				n.RemoveChild(c1)

				if c1.Type == html.TextNode || c1.Data == "a" {
					// pf("wrapping %v\n", NodeTypeStr(c1.Type))
					wrap := html.Node{Type: html.ElementNode, Data: "p",
						Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
					wrap.FirstChild = c1
					p.InsertBefore(&wrap, insertionPoint)
					c1.Parent = &wrap
					insertionPoint = &wrap

				} else {
					p.InsertBefore(c1, insertionPoint)
					insertionPoint = c1
				}

			}
			p.RemoveChild(n)
			if p.Data != parentType {
				p.Data = parentType
			}

		}

	}

}

Пример #4

0

Показать файл

Файл: 01_cleanse.go Проект: aarzilli/tools

// We want to remove some children.
// A direct loop is impossible,
// since "NextSibling" is set to nil during Remove().
// Therefore:
//   First assemble children separately.
//   Then remove them.
func removeUnwanted(n *html.Node) {
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		if unwanteds[c.Data] {
			n.RemoveChild(c)
		}
	}
}

Пример #5

0

Показать файл

Файл: 6_dir_digest_3.go Проект: aarzilli/tools

func removeUnwanted(n *html.Node) {
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		if n.Type == html.ElementNode && n.Data == "script" || n.Type == html.CommentNode {
			n.RemoveChild(c)
		}
	}
}

Пример #6

0

Показать файл

Файл: mutate.go Проект: albertjin/goquery

// Replace the given node's children with the given string.
func setNodeText(node *html.Node, s string) {
	// remove all existing children
	for node.FirstChild != nil {
		node.RemoveChild(node.FirstChild)
	}
	// add the text
	node.AppendChild(&html.Node{
		Type: html.TextNode,
		Data: s,
	})
}

Пример #7

0

Показать файл

Файл: readability.go Проект: jpoehls/feedmailer

func replaceNodeWithChildren(n *html.Node) {
	var next *html.Node
	parent := n.Parent

	for c := n.FirstChild; c != nil; c = next {
		next = c.NextSibling
		n.RemoveChild(c)

		parent.InsertBefore(c, n)
	}

	parent.RemoveChild(n)
}

Пример #8

0

Показать файл

Файл: 03_condense_top_down.go Проект: aarzilli/tools

func removeEmptyNodes(n *html.Node, lvl int) {

	// children
	cc := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		cc = append(cc, c)
	}
	for _, c := range cc {
		removeEmptyNodes(c, lvl+1)
	}

	// processing
	// empty element nodes
	if n.Type == html.ElementNode && n.Data == "img" {
		src := attrX(n.Attr, "src")
		if src == "" {
			n.Parent.RemoveChild(n)
		}
	}

	if n.Type == html.ElementNode && n.FirstChild == nil && n.Data == "a" {
		href := attrX(n.Attr, "href")
		if href == "#" || href == "" {
			n.Parent.RemoveChild(n)
		}
	}

	if n.Type == html.ElementNode && n.FirstChild == nil &&
		(n.Data == "em" || n.Data == "strong") {
		n.Parent.RemoveChild(n)
	}

	if n.Type == html.ElementNode && n.FirstChild == nil &&
		(n.Data == "div" || n.Data == "span" || n.Data == "li" || n.Data == "p") {
		n.Parent.RemoveChild(n)
	}

	// spans with less than 2 characters inside => flatten to text
	only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
	if n.Type == html.ElementNode &&
		n.Data == "span" &&
		only1Child &&
		n.FirstChild.Type == html.TextNode &&
		len(strings.TrimSpace(n.FirstChild.Data)) < 3 {
		n.Type = html.TextNode
		n.Data = n.FirstChild.Data
		n.RemoveChild(n.FirstChild)
	}

}

Пример #9

0

Показать файл

Файл: clean.go Проект: documize/html-diff

// clean normalises styles/colspan and removes any CleanTags specified, along with newlines;
// but also makes all the character handling (for example "&#160;" as utf-8) the same.
// It returns the estimated number of treeRunes that will be used.
// TODO more cleaning of the input HTML, as required.
func (c *Config) clean(n *html.Node) int {
	size := 1
	switch n.Type {
	case html.ElementNode:
		for ai := 0; ai < len(n.Attr); ai++ {
			a := n.Attr[ai]
			switch {
			case strings.ToLower(a.Key) == "style":
				if strings.TrimSpace(a.Val) == "" { // delete empty styles
					n.Attr = delAttr(n.Attr, ai)
					ai--
				} else { // tidy non-empty styles
					// TODO there could be more here to make sure the style entries are in the same order etc.
					n.Attr[ai].Val = strings.Replace(a.Val, " ", "", -1)
					if !strings.HasSuffix(n.Attr[ai].Val, ";") {
						n.Attr[ai].Val += ";"
					}
				}
			case n.DataAtom == atom.Td &&
				strings.ToLower(a.Key) == "colspan" &&
				strings.TrimSpace(a.Val) == "1":
				n.Attr = delAttr(n.Attr, ai)
				ai--
			}
		}
	case html.TextNode:
		n.Data = htm.UnescapeString(n.Data)
		size += utf8.RuneCountInString(n.Data) - 1 // len(n.Data) would be faster, but use more memory
	}
searchChildren:
	for ch := n.FirstChild; ch != nil; ch = ch.NextSibling {
		switch ch.Type {
		case html.ElementNode:
			for _, rr := range c.CleanTags {
				if rr == ch.Data {
					n.RemoveChild(ch)
					goto searchChildren
				}
			}
		}
		size += c.clean(ch)
	}
	return size
}

Пример #10

0

Показать файл

Файл: html.go Проект: jwatt/kythe

// sliceNode returns the two halves of the HTML tree starting at node after
// splitting it at the given textual offset.
func sliceNode(offsets *nodeOffsets, node *html.Node, offset int) (*html.Node, *html.Node) {
	origStart, origEnd := offsets.Bounds(node)
	if origStart > offset || origEnd < offset {
		log.Fatalf("sliceNode: offset %d out of node's span (%d → %d)", offset, origStart, origEnd)
	}

	n, m := copyNode(node), copyNode(node)
	parent := node.Parent
	if parent != nil {
		parent.InsertBefore(n, node)
		parent.InsertBefore(m, node)
		parent.RemoveChild(node)
	}

	switch node.Type {
	default:
		log.Fatalf("Unhandled node kind: %d", node.Type)
	case html.ElementNode:
		child := node.FirstChild
		for child != nil {
			next := child.NextSibling

			if _, end := offsets.Bounds(child); end <= offset {
				node.RemoveChild(child)
				n.AppendChild(child)
			} else if start, _ := offsets.Bounds(child); start > offset {
				node.RemoveChild(child)
				m.AppendChild(child)
			} else {
				left, right := sliceNode(offsets, child, offset)
				node.RemoveChild(left)
				node.RemoveChild(right)
				n.AppendChild(left)
				m.AppendChild(right)
			}

			child = next
		}
	case html.TextNode:
		mark := offset - origStart
		n.Data = node.Data[:mark]
		m.Data = node.Data[mark:]
	}

	if split := offsets.update(n, origStart); split != offset {
		log.Fatalf("split %d ≠ %d", split, offset)
	}
	if newEnd := offsets.update(m, offset); newEnd != origEnd {
		log.Fatalf("end %d ≠ %d", newEnd, origEnd)
	}

	return n, m
}

Пример #11

0

Показать файл

Файл: minify.go Проект: membase/ns_server

// Minifies node and returns a minification Result.
func doMinify(node *html.Node, ctx *context) result {
	prevWasWhitespace := false
	var next *html.Node
	rv := result{}
	for child := node.FirstChild; child != nil; child = next {
		next = child.NextSibling
		script := getHTMLNodeAttr(child, "script", "src")
		if rv.IndexHTMLBase == "" {
			rv.IndexHTMLBase = getHTMLNodeAttr(child, "base", "href")
		}
		switch {
		case strings.Contains(script, "libs/") && strings.HasSuffix(script, ".js"):
			minFile := script[:len(script)-3] + ".min.js"
			if _, err := os.Stat(filepath.Join(ctx.BaseDir, minFile)); err == nil {
				replaceAttrValue(child, "src", minFile)
			}
			prevWasWhitespace = false
		case strings.HasSuffix(script, ".js"):
			if !ctx.FoundFirstAppScript {
				ctx.FoundFirstAppScript = true
				node.InsertBefore(makeAppMinJsNode(), child)
				node.InsertBefore(makeNewLine(), child)
			}
			rv.AppScripts = append(rv.AppScripts, script)
			node.RemoveChild(child)
		case isWhitespaceText(child) && node.Type == html.ElementNode && node.Data == "head":
			if !prevWasWhitespace {
				node.InsertBefore(makeNewLine(), child)
			}
			node.RemoveChild(child)
			prevWasWhitespace = true
		default:
			if isPluggableUIInjectionComment(child) {
				rv.PluggableInjectionCount++
			} else {
				childResult := doMinify(child, ctx)
				rv.merge(childResult)
			}
			prevWasWhitespace = false
		}
	}
	return rv
}

Пример #12

0

Показать файл

Файл: document.go Проект: ckome/newscat

// cleanBody removes unwanted HTML elements from the HTML body.
func (doc *Document) cleanBody(n *html.Node, level int) {
	// removeNode returns true if a node should be removed from HTML document.
	removeNode := func(c *html.Node, level int) bool {
		return removeElements[c.DataAtom]
	}

	var curr *html.Node = n.FirstChild
	var next *html.Node = nil
	for ; curr != nil; curr = next {
		// We have to remember the next sibling here because calling RemoveChild
		// sets curr's NextSibling pointer to nil and we would quit the loop
		// prematurely.
		next = curr.NextSibling
		if curr.Type == html.ElementNode {
			if removeNode(curr, level) {
				n.RemoveChild(curr)
			} else {
				doc.cleanBody(curr, level+1)
			}
		}
	}
}

Пример #13

0

Показать файл

Файл: cleaner.go Проект: BenLubar/htmlcleaner

func cleanChildren(c *Config, parent *html.Node) {
	var children []*html.Node
	for parent.FirstChild != nil {
		child := parent.FirstChild
		parent.RemoveChild(child)
		children = append(children, filterNode(c, child))
	}

	if c.WrapText {
		_, ok := c.wrap[parent.DataAtom]
		if !ok && parent.DataAtom == 0 {
			_, ok = c.wrapCustom[parent.Data]
		}
		if ok {
			children = wrapText(children)
		}
	}

	for _, child := range children {
		parent.AppendChild(child)
	}
}

Пример #14

0

Показать файл

Файл: cleaner.go Проект: Vetcher/pagedownloader

//return true if need to delete node, false another way
func deleteValuelessNodes(innode *html.Node) bool {
	if innode.Type == html.CommentNode {
		//fmt.Println("comment:" + innode.Data)
		return true
	}
	if innode.Type == html.ElementNode {
		//innode.Attr = []html.Attribute{}
		if innode.Data == "script" || innode.Data == "meta" || innode.Data == "style" || innode.Data == "head" || innode.Data == "form" || innode.Data == "noscript" || innode.Data == "img" || innode.Data == "noindex" || innode.Data == "span" {
			//fmt.Println("script: " + innode.Data)
			return true
		}
	}
	for node := innode.FirstChild; node != nil; {
		if deleteValuelessNodes(node) {
			tnode := node.NextSibling
			innode.RemoveChild(node)
			node = tnode
			continue
		}
		node = node.NextSibling
	}
	return false
}

Пример #15

0

Показать файл

Файл: 10_textify_brute_force.go Проект: aarzilli/tools

func textifyNodeSubtree(n *html.Node) {

	if n.Type == html.ElementNode {

		nd := dom.Nd("text")
		nd.Data = textifySubtreeBruteForce(n, 0)
		nd.Data = stringspb.NormalizeInnerWhitespace(nd.Data)

		cc := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cc = append(cc, c)
		}
		for _, c := range cc {
			n.RemoveChild(c)
		}

		n.AppendChild(nd)

		nd2 := dom.Nd("br")
		dom.InsertAfter(n, nd2)

	}

}

Пример #16

0

Показать файл

Файл: 07_condense_bottom_up_v2.go Проект: aarzilli/tools

func flattenSubtreeV2(n *html.Node, b *bytes.Buffer, depth int, tpar *html.Node) (*bytes.Buffer, *html.Node) {

	if b == nil {
		b = new(bytes.Buffer)
	}
	if tpar == nil {
		tpar = &html.Node{
			Type:     n.Type,
			DataAtom: n.DataAtom,
			Data:     n.Data,
			Attr:     make([]html.Attribute, len(n.Attr)),
		}
		copy(tpar.Attr, n.Attr)
	}

	switch {
	case n.Type == html.ElementNode && n.Data == "a":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
		// wpf(b, "[a] ")
	case n.Type == html.ElementNode && n.Data == "img":
		// img2Link(n)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "em" || n.Data == "strong":
		wpf(b, "[%v l%v] ", n.Data, depth)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "label" || n.Data == "input" || n.Data == "textarea":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "p" || n.Data == "div" || n.Data == "li" || n.Data == "ol" || n.Data == "h1" || n.Data == "h2" || n.Data == "ul":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "span":
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			n.RemoveChild(c)
			tpar.AppendChild(c)
		}
		n.Parent.RemoveChild(n)
	case n.Type == html.TextNode && n.Data != "":
		n.Data = strings.TrimSpace(n.Data)
		n.Data += " "
		wpf(b, n.Data)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	default:
		log.Printf("unhandled %s %s\n", dom.NodeTypeStr(n.Type), n.Data)
		n.Parent.RemoveChild(n)
	}

	//
	//
	children := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		// fmt.Printf("still has children %v\n", c.Data)
		children = append(children, c) //  assembling separately, before removing.
	}
	for _, c := range children {
		flattenSubtreeV2(c, b, depth+1, tpar)
	}

	return b, tpar
}

Пример #17

0

Показать файл

Файл: transform.go Проект: asartalo/go-html-transform

func removeChildren(n *html.Node) {
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		defer n.RemoveChild(c)
	}
}

Пример #18

0

Показать файл

Файл: 03_top_down_v3.go Проект: aarzilli/tools

// Now this third implementation finally condenses *selectively*.
// Not all boats from each pond are lifted equally.
// We achieve tremendous structural simplification.
// It also starts from top, pulling lower levels up.
// Unlike implementation #1, that started from the middle.
func topDownV3(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {

	if l1.Type != html.ElementNode &&
		l1.Type != html.DocumentNode {
		return // cannot assign to - do not unable to have children
	}
	if l1.Data == "span" || l1.Data == "a" {
		return // want not condense into
	}

	// dig two levels deep

	// isolate l2,l3
	l2s := []*html.Node{}
	l3s := map[*html.Node][]*html.Node{}

	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {

		l2s = append(l2s, l2)
		// l2s = append([]*html.Node{l2}, l2s...) // order inversion

		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
			l3s[l2] = append(l3s[l2], l3)
			// l3s[l2] = append(map[*html.Node][]*html.Node{l2: []*html.Node{l3}}, l3s[l2]...) // order inversion
		}
	}

	postponedRemoval := map[*html.Node]bool{}

	//
	//
	// check types for each l2 subtree distinctively
	for _, l2 := range l2s {

		l2Match := l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div

		l3Match := true
		for _, l3 := range l3s[l2] {
			l3Match = l3Match && (l3.Type == html.ElementNode && l3Types[l3.Data])
		}

		// act
		if l2Match && l3Match {

			// detach l3 from l2
			for _, l3 := range l3s[l2] {
				// if ml3[l3] > 0 {
				// 	fmt.Printf("rmd_%v_%v ", ml3[l3], l3.Data)
				// }
				l2.RemoveChild(l3)
				// ml3[l3]++
			}

			// Since we still need l2 below
			// We have to postpone detaching l2 from l1
			// to the bottom
			// NOT HERE: l1.RemoveChild(l2)
			postponedRemoval[l2] = true

			for _, l3 := range l3s[l2] {
				// attach l3 to l1

				if l3.Data != "a" && l3.Data != "span" {
					l1.InsertBefore(l3, l2)
				} else {
					wrap := dom.Nd("p")
					wrap.Attr = []html.Attribute{html.Attribute{Key: "cfrm", Val: "noth"}}
					wrap.AppendChild(l3)
					// NOT  wrap.FirstChild = l3
					l1.InsertBefore(wrap, l2)
				}
			}

		}

	}

	for k, _ := range postponedRemoval {
		l1.RemoveChild(k) // detach l2 from l1
	}

}

Пример #19

0

Показать файл

Файл: 07_condense_bottom_up_v2.go Проект: aarzilli/tools

func condenseBottomUpV2(n *html.Node, lvl, lvlDo int, types map[string]bool) {

	if lvl < lvlDo {

		cs := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cs = append(cs, c)
		}
		for _, c := range cs {
			condenseBottomUpV2(c, lvl+1, lvlDo, types)
		}

	} else {

		// log.Printf("action on %v %v\n", lvl, lvlDo)

		switch {

		case n.Type == html.ElementNode && types[n.Data]:

			oldPar := n.Parent
			if oldPar == nil {
				return
			}

			b, newPar := flattenSubtreeV2(n, nil, 0, nil)

			// placeholder := dom.Nd("div")
			// par := n.Parent
			// par.InsertBefore(placeholder, n.NextSibling)
			// par.RemoveChild(n)
			// par.InsertBefore(n2, placeholder)

			for c := oldPar.FirstChild; c != nil; c = c.NextSibling {
				oldPar.RemoveChild(c)
			}

			for c := newPar.FirstChild; c != nil; c = c.NextSibling {
				newPar.RemoveChild(c)
				oldPar.AppendChild(c)
			}

			if lvlDo > 4 {
				bx := dom.PrintSubtree(newPar)
				fmt.Printf("%s", bx)
			}

			// n = n2

			nodeRepl := dom.Nd("text", b.String())

			if false {

				// Remove all existing children.
				// Direct loop impossible, since "NextSibling" is set to nil by Remove().
				children := []*html.Node{}
				for c := n.FirstChild; c != nil; c = c.NextSibling {
					children = append(children, c) //  assembling separately, before removing.
				}
				for _, c := range children {
					log.Printf("c %4v rem from %4v ", c.Data, n.Data)
					n.RemoveChild(c)
				}

				// we can't put our replacement "under" an image, since img cannot have children
				if n.Type == html.ElementNode && n.Data == "img" {
					n.Parent.InsertBefore(nodeRepl, n.NextSibling) // if n.NextSibling==nil => insert at the end
					n.Parent.RemoveChild(n)
				} else {
					n.AppendChild(nodeRepl)
				}

				// Insert a  || and a newline before every <a...>
				// if n.Data == "a" {
				// 	n.Parent.InsertBefore(dom.Nd("text", " || "), n)
				// }
			}

		default:
		}

	}

}

Пример #20

0

Показать файл

Файл: 03_top_down_v2.go Проект: aarzilli/tools

// Condense upwards builds a three-levels subtree
// starting from param node l1
// l2 and l3 nodes need to comply by type
//
// Then l3 is moved under l1; l2 is eliminated
//
// For <a> or "text" l3 nodes, we could introduce wrappers
//
// l2Types so far always is "div".
// Multiple l2Types are possible, but difficult to imagine.
//
// l1 type could be changed - from div to ul for instance, but I found no use for that
//
// Implementation yields similar result as condenseTopDown1
// but the "all-or-nothing" logic is clearer
func topDownV2(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {

	if l1.Type != html.ElementNode &&
		l1.Type != html.DocumentNode {
		return // cannot assign to - do not unable to have children
	}
	if l1.Data == "span" || l1.Data == "a" {
		return // want not condense into
	}

	// dig two levels deeper

	// isolate l2
	var l2s []*html.Node
	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {
		l2s = append(l2s, l2)
		// l2s = append([]*html.Node{l2}, l2s...) // order inversion
	}

	// measure types
	l2Div := true

	// note that *all* l3 must have l3Type, not just those those of one l2 element
	// otherwise we get only partial restructuring - and therefore sequence errors
	l3Div := true

	for _, l2 := range l2s {
		l2Div = l2Div && l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div
		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
			l3Div = l3Div && (l3.Type == html.ElementNode && l3Types[l3.Data]) // l3 is a div or ul or form
		}
	}

	// act
	if l2Div && l3Div {
		for _, l2 := range l2s {

			// isolate l3
			var l3s []*html.Node
			for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
				l3s = append(l3s, l3)
				// l3s = append([]*html.Node{l3}, l3s...) // order inversion
			}

			// detach l3 from l2
			for _, l3 := range l3s {
				l2.RemoveChild(l3)
			}
			l1.RemoveChild(l2) // detach l2 from l1

			for _, l3 := range l3s {
				// attach l3 to l1, possible wrapper of <a> or <span>
				l1.InsertBefore(l3, nil) // insert at end

				// wrap := html.Node{Type: html.ElementNode, Data: "p", Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
				// wrap.FirstChild = c1
				// l1.InsertBefore(&wrap, nil)

			}

		}
	}

}

Пример #21

0

Показать файл

Файл: 05_breakout_imgs_from_a_trees.go Проект: aarzilli/tools

func breakoutImagesFromAnchorTrees(n *html.Node) {

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		breakoutImagesFromAnchorTrees(c)
	}

	if n.Type == html.ElementNode && n.Data == "a" {

		img, lvl := searchImg(n, nil, 0)

		if img != nil {

			only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
			if lvl == 1 && only1Child {
				// log.Printf("only child image lvl %v a\n", lvl)
				n.RemoveChild(img)
				n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
				contnt := urlBeautify(attrX(n.Attr, "href"))
				if len(contnt) < 6 {
					contnt = "[was img] " + contnt
				}
				n.AppendChild(dom.Nd("text", contnt))
			} else {

				if debugBreakOut {
					b0 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b0)
				}

				// log.Printf("  got it  %v\n", img.Data)
				a1 := dom.CloneNodeWithSubtree(n)
				fc1 := closureDeleter(true)
				fc1(n, 0, false)
				if debugBreakOut {
					b1 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b1)
				}

				fc2 := closureDeleter(false)
				fc2(a1, 0, false)
				if debugBreakOut {
					b2 := dom.PrintSubtree(a1)
					log.Printf("\n%s\n", b2)
					log.Printf("--------------------\n")
				}

				if true {
					n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
					n.Parent.InsertBefore(a1, img.NextSibling)
				} else {
					// old way ; sequence corrpution if n had rightwise siblings.
					n.Parent.AppendChild(img)
					n.Parent.AppendChild(a1)

				}

			}

			// changing image to link later

		} else {
			// log.Printf("no img in a\n")
		}
	}

}

Golang Node.RemoveChild примеры использования