예제 #1
0
func flattenSubtreeV3Inner(n, nClone *html.Node, lvl int) {

	// log.Printf("fsbi\n")

	for ch := n.FirstChild; ch != nil; ch = ch.NextSibling {

		chClone := dom.CloneNode(ch)

		switch {

		case ch.Type == html.ElementNode && standard[ch.Data]:
			nClone.AppendChild(chClone)
			flattenSubtreeV3Inner(ch, chClone, lvl+1)

		case ch.Type == html.ElementNode && ch.Data == "a":
			nClone.AppendChild(chClone)
			flattenSubtreeV3Inner(ch, chClone, lvl+1)

		case ch.Type == html.ElementNode && ch.Data == "img":
			nClone.AppendChild(chClone)

		case ch.Data == "span":
			// log.Printf(strings.Repeat("  ", lvl) + "span \n")
			for cch := ch.FirstChild; cch != nil; cch = cch.NextSibling {
				// log.Printf(strings.Repeat("    ", lvl)+"span child %v", cch.Data)
				cchClone := dom.CloneNode(cch)
				nClone.AppendChild(cchClone)
				nClone.AppendChild(dom.Nd("text", " "))
				flattenSubtreeV3Inner(cch, cchClone, lvl+1)
			}

		case ch.Type == html.TextNode && ch.Data != "":
			chClone.Data = strings.TrimSpace(chClone.Data)
			chClone.Data += " "
			nClone.AppendChild(chClone)

		default:
			//			nClone.AppendChild(chClone)
			log.Printf("unhandled %s %s\n", dom.NodeTypeStr(ch.Type), ch.Data)

		}

	}

}
예제 #2
0
func flattenSubtreeV2(n *html.Node, b *bytes.Buffer, depth int, tpar *html.Node) (*bytes.Buffer, *html.Node) {

	if b == nil {
		b = new(bytes.Buffer)
	}
	if tpar == nil {
		tpar = &html.Node{
			Type:     n.Type,
			DataAtom: n.DataAtom,
			Data:     n.Data,
			Attr:     make([]html.Attribute, len(n.Attr)),
		}
		copy(tpar.Attr, n.Attr)
	}

	switch {
	case n.Type == html.ElementNode && n.Data == "a":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
		// wpf(b, "[a] ")
	case n.Type == html.ElementNode && n.Data == "img":
		// img2Link(n)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "em" || n.Data == "strong":
		wpf(b, "[%v l%v] ", n.Data, depth)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "label" || n.Data == "input" || n.Data == "textarea":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "p" || n.Data == "div" || n.Data == "li" || n.Data == "ol" || n.Data == "h1" || n.Data == "h2" || n.Data == "ul":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "span":
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			n.RemoveChild(c)
			tpar.AppendChild(c)
		}
		n.Parent.RemoveChild(n)
	case n.Type == html.TextNode && n.Data != "":
		n.Data = strings.TrimSpace(n.Data)
		n.Data += " "
		wpf(b, n.Data)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	default:
		log.Printf("unhandled %s %s\n", dom.NodeTypeStr(n.Type), n.Data)
		n.Parent.RemoveChild(n)
	}

	//
	//
	children := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		// fmt.Printf("still has children %v\n", c.Data)
		children = append(children, c) //  assembling separately, before removing.
	}
	for _, c := range children {
		flattenSubtreeV2(c, b, depth+1, tpar)
	}

	return b, tpar
}