func reIndent(n *html.Node, lvl int) { if lvl > cScaffoldLvls && n.Parent == nil { bb := dom.PrintSubtree(n) _ = bb // log.Printf("%s", bb.Bytes()) hint := "" if ml3[n] > 0 { hint = " from ml3" } log.Print("reIndent: no parent ", hint) return } // Before children processing switch n.Type { case html.ElementNode: if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode { ind := strings.Repeat("\t", lvl-2) dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind}) } case html.CommentNode: dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"}) case html.TextNode: n.Data = strings.TrimSpace(n.Data) + " " if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") { n.Data = " " + n.Data } // link texts without trailing space if n.Parent != nil && n.Parent.Data == "a" { n.Data = strings.TrimSpace(n.Data) } } // Children for c := n.FirstChild; c != nil; c = c.NextSibling { reIndent(c, lvl+1) } // After children processing switch n.Type { case html.ElementNode: // I dont know why, // but this needs to happend AFTER the children if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode { ind := strings.Repeat("\t", lvl-2) ind = "\n" + ind // link texts without new line if n.Data == "a" { ind = "" } if n.LastChild != nil { dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind}) } } } }
func noParent(n *html.Node) bool { p := n.Parent if p == nil { if n.Type == html.DoctypeNode || n.Type == html.DocumentNode { return true } pf("parent is nil\n") b := dom.PrintSubtree(n) pf("%s", b) return true } return false }
func breakoutImagesFromAnchorTrees(n *html.Node) { for c := n.FirstChild; c != nil; c = c.NextSibling { breakoutImagesFromAnchorTrees(c) } if n.Type == html.ElementNode && n.Data == "a" { img, lvl := searchImg(n, nil, 0) if img != nil { only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild if lvl == 1 && only1Child { // log.Printf("only child image lvl %v a\n", lvl) n.RemoveChild(img) n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end" contnt := urlBeautify(attrX(n.Attr, "href")) if len(contnt) < 6 { contnt = "[was img] " + contnt } n.AppendChild(dom.Nd("text", contnt)) } else { if debugBreakOut { b0 := dom.PrintSubtree(n) log.Printf("\n%s\n", b0) } // log.Printf(" got it %v\n", img.Data) a1 := dom.CloneNodeWithSubtree(n) fc1 := closureDeleter(true) fc1(n, 0, false) if debugBreakOut { b1 := dom.PrintSubtree(n) log.Printf("\n%s\n", b1) } fc2 := closureDeleter(false) fc2(a1, 0, false) if debugBreakOut { b2 := dom.PrintSubtree(a1) log.Printf("\n%s\n", b2) log.Printf("--------------------\n") } if true { n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end" n.Parent.InsertBefore(a1, img.NextSibling) } else { // old way ; sequence corrpution if n had rightwise siblings. n.Parent.AppendChild(img) n.Parent.AppendChild(a1) } } // changing image to link later } else { // log.Printf("no img in a\n") } } }
func condenseBottomUpV2(n *html.Node, lvl, lvlDo int, types map[string]bool) { if lvl < lvlDo { cs := []*html.Node{} for c := n.FirstChild; c != nil; c = c.NextSibling { cs = append(cs, c) } for _, c := range cs { condenseBottomUpV2(c, lvl+1, lvlDo, types) } } else { // log.Printf("action on %v %v\n", lvl, lvlDo) switch { case n.Type == html.ElementNode && types[n.Data]: oldPar := n.Parent if oldPar == nil { return } b, newPar := flattenSubtreeV2(n, nil, 0, nil) // placeholder := dom.Nd("div") // par := n.Parent // par.InsertBefore(placeholder, n.NextSibling) // par.RemoveChild(n) // par.InsertBefore(n2, placeholder) for c := oldPar.FirstChild; c != nil; c = c.NextSibling { oldPar.RemoveChild(c) } for c := newPar.FirstChild; c != nil; c = c.NextSibling { newPar.RemoveChild(c) oldPar.AppendChild(c) } if lvlDo > 4 { bx := dom.PrintSubtree(newPar) fmt.Printf("%s", bx) } // n = n2 nodeRepl := dom.Nd("text", b.String()) if false { // Remove all existing children. // Direct loop impossible, since "NextSibling" is set to nil by Remove(). children := []*html.Node{} for c := n.FirstChild; c != nil; c = c.NextSibling { children = append(children, c) // assembling separately, before removing. } for _, c := range children { log.Printf("c %4v rem from %4v ", c.Data, n.Data) n.RemoveChild(c) } // we can't put our replacement "under" an image, since img cannot have children if n.Type == html.ElementNode && n.Data == "img" { n.Parent.InsertBefore(nodeRepl, n.NextSibling) // if n.NextSibling==nil => insert at the end n.Parent.RemoveChild(n) } else { n.AppendChild(nodeRepl) } // Insert a || and a newline before every <a...> // if n.Data == "a" { // n.Parent.InsertBefore(dom.Nd("text", " || "), n) // } } default: } } }