Пример #1
0
func reIndent(n *html.Node, lvl int) {

	if lvl > cScaffoldLvls && n.Parent == nil {
		bb := dom.PrintSubtree(n)
		_ = bb
		// log.Printf("%s", bb.Bytes())
		hint := ""
		if ml3[n] > 0 {
			hint = "   from ml3"
		}
		log.Print("reIndent: no parent ", hint)
		return
	}

	// Before children processing
	switch n.Type {
	case html.ElementNode:
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind})
		}
	case html.CommentNode:
		dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"})
	case html.TextNode:
		n.Data = strings.TrimSpace(n.Data) + " "
		if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") {
			n.Data = " " + n.Data
		}
		// link texts without trailing space
		if n.Parent != nil && n.Parent.Data == "a" {
			n.Data = strings.TrimSpace(n.Data)
		}
	}

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		reIndent(c, lvl+1)
	}

	// After children processing
	switch n.Type {
	case html.ElementNode:
		// I dont know why,
		// but this needs to happend AFTER the children
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			ind = "\n" + ind
			// link texts without new line
			if n.Data == "a" {
				ind = ""
			}
			if n.LastChild != nil {
				dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind})
			}
		}
	}

}
Пример #2
0
func noParent(n *html.Node) bool {

	p := n.Parent
	if p == nil {
		if n.Type == html.DoctypeNode || n.Type == html.DocumentNode {
			return true
		}
		pf("parent is nil\n")
		b := dom.PrintSubtree(n)
		pf("%s", b)
		return true
	}

	return false

}
func breakoutImagesFromAnchorTrees(n *html.Node) {

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		breakoutImagesFromAnchorTrees(c)
	}

	if n.Type == html.ElementNode && n.Data == "a" {

		img, lvl := searchImg(n, nil, 0)

		if img != nil {

			only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
			if lvl == 1 && only1Child {
				// log.Printf("only child image lvl %v a\n", lvl)
				n.RemoveChild(img)
				n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
				contnt := urlBeautify(attrX(n.Attr, "href"))
				if len(contnt) < 6 {
					contnt = "[was img] " + contnt
				}
				n.AppendChild(dom.Nd("text", contnt))
			} else {

				if debugBreakOut {
					b0 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b0)
				}

				// log.Printf("  got it  %v\n", img.Data)
				a1 := dom.CloneNodeWithSubtree(n)
				fc1 := closureDeleter(true)
				fc1(n, 0, false)
				if debugBreakOut {
					b1 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b1)
				}

				fc2 := closureDeleter(false)
				fc2(a1, 0, false)
				if debugBreakOut {
					b2 := dom.PrintSubtree(a1)
					log.Printf("\n%s\n", b2)
					log.Printf("--------------------\n")
				}

				if true {
					n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
					n.Parent.InsertBefore(a1, img.NextSibling)
				} else {
					// old way ; sequence corrpution if n had rightwise siblings.
					n.Parent.AppendChild(img)
					n.Parent.AppendChild(a1)

				}

			}

			// changing image to link later

		} else {
			// log.Printf("no img in a\n")
		}
	}

}
Пример #4
0
func condenseBottomUpV2(n *html.Node, lvl, lvlDo int, types map[string]bool) {

	if lvl < lvlDo {

		cs := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cs = append(cs, c)
		}
		for _, c := range cs {
			condenseBottomUpV2(c, lvl+1, lvlDo, types)
		}

	} else {

		// log.Printf("action on %v %v\n", lvl, lvlDo)

		switch {

		case n.Type == html.ElementNode && types[n.Data]:

			oldPar := n.Parent
			if oldPar == nil {
				return
			}

			b, newPar := flattenSubtreeV2(n, nil, 0, nil)

			// placeholder := dom.Nd("div")
			// par := n.Parent
			// par.InsertBefore(placeholder, n.NextSibling)
			// par.RemoveChild(n)
			// par.InsertBefore(n2, placeholder)

			for c := oldPar.FirstChild; c != nil; c = c.NextSibling {
				oldPar.RemoveChild(c)
			}

			for c := newPar.FirstChild; c != nil; c = c.NextSibling {
				newPar.RemoveChild(c)
				oldPar.AppendChild(c)
			}

			if lvlDo > 4 {
				bx := dom.PrintSubtree(newPar)
				fmt.Printf("%s", bx)
			}

			// n = n2

			nodeRepl := dom.Nd("text", b.String())

			if false {

				// Remove all existing children.
				// Direct loop impossible, since "NextSibling" is set to nil by Remove().
				children := []*html.Node{}
				for c := n.FirstChild; c != nil; c = c.NextSibling {
					children = append(children, c) //  assembling separately, before removing.
				}
				for _, c := range children {
					log.Printf("c %4v rem from %4v ", c.Data, n.Data)
					n.RemoveChild(c)
				}

				// we can't put our replacement "under" an image, since img cannot have children
				if n.Type == html.ElementNode && n.Data == "img" {
					n.Parent.InsertBefore(nodeRepl, n.NextSibling) // if n.NextSibling==nil => insert at the end
					n.Parent.RemoveChild(n)
				} else {
					n.AppendChild(nodeRepl)
				}

				// Insert a  || and a newline before every <a...>
				// if n.Data == "a" {
				// 	n.Parent.InsertBefore(dom.Nd("text", " || "), n)
				// }
			}

		default:
		}

	}

}