func flattenSubtreeV3Inner(n, nClone *html.Node, lvl int) { // log.Printf("fsbi\n") for ch := n.FirstChild; ch != nil; ch = ch.NextSibling { chClone := dom.CloneNode(ch) switch { case ch.Type == html.ElementNode && standard[ch.Data]: nClone.AppendChild(chClone) flattenSubtreeV3Inner(ch, chClone, lvl+1) case ch.Type == html.ElementNode && ch.Data == "a": nClone.AppendChild(chClone) flattenSubtreeV3Inner(ch, chClone, lvl+1) case ch.Type == html.ElementNode && ch.Data == "img": nClone.AppendChild(chClone) case ch.Data == "span": // log.Printf(strings.Repeat(" ", lvl) + "span \n") for cch := ch.FirstChild; cch != nil; cch = cch.NextSibling { // log.Printf(strings.Repeat(" ", lvl)+"span child %v", cch.Data) cchClone := dom.CloneNode(cch) nClone.AppendChild(cchClone) nClone.AppendChild(dom.Nd("text", " ")) flattenSubtreeV3Inner(cch, cchClone, lvl+1) } case ch.Type == html.TextNode && ch.Data != "": chClone.Data = strings.TrimSpace(chClone.Data) chClone.Data += " " nClone.AppendChild(chClone) default: // nClone.AppendChild(chClone) log.Printf("unhandled %s %s\n", dom.NodeTypeStr(ch.Type), ch.Data) } } }
func flattenSubtreeV2(n *html.Node, b *bytes.Buffer, depth int, tpar *html.Node) (*bytes.Buffer, *html.Node) { if b == nil { b = new(bytes.Buffer) } if tpar == nil { tpar = &html.Node{ Type: n.Type, DataAtom: n.DataAtom, Data: n.Data, Attr: make([]html.Attribute, len(n.Attr)), } copy(tpar.Attr, n.Attr) } switch { case n.Type == html.ElementNode && n.Data == "a": n.Parent.RemoveChild(n) tpar.AppendChild(n) // wpf(b, "[a] ") case n.Type == html.ElementNode && n.Data == "img": // img2Link(n) n.Parent.RemoveChild(n) tpar.AppendChild(n) case n.Data == "em" || n.Data == "strong": wpf(b, "[%v l%v] ", n.Data, depth) n.Parent.RemoveChild(n) tpar.AppendChild(n) case n.Data == "label" || n.Data == "input" || n.Data == "textarea": n.Parent.RemoveChild(n) tpar.AppendChild(n) case n.Data == "p" || n.Data == "div" || n.Data == "li" || n.Data == "ol" || n.Data == "h1" || n.Data == "h2" || n.Data == "ul": n.Parent.RemoveChild(n) tpar.AppendChild(n) case n.Data == "span": for c := n.FirstChild; c != nil; c = c.NextSibling { n.RemoveChild(c) tpar.AppendChild(c) } n.Parent.RemoveChild(n) case n.Type == html.TextNode && n.Data != "": n.Data = strings.TrimSpace(n.Data) n.Data += " " wpf(b, n.Data) n.Parent.RemoveChild(n) tpar.AppendChild(n) default: log.Printf("unhandled %s %s\n", dom.NodeTypeStr(n.Type), n.Data) n.Parent.RemoveChild(n) } // // children := []*html.Node{} for c := n.FirstChild; c != nil; c = c.NextSibling { // fmt.Printf("still has children %v\n", c.Data) children = append(children, c) // assembling separately, before removing. } for _, c := range children { flattenSubtreeV2(c, b, depth+1, tpar) } return b, tpar }