func Test2(t *testing.T) {

	lg, lge := loghttp.Logger(nil, nil)

	doc, err := html.Parse(strings.NewReader(testDocs[0]))
	if err != nil {
		lge(err)
		return
	}
	removeCommentsAndIntertagWhitespace(NdX{doc, 0})

	breakoutImagesFromAnchorTrees(doc)

	removeCommentsAndIntertagWhitespace(NdX{doc, 0})
	reIndent(doc, 0)
	var b bytes.Buffer
	err = html.Render(&b, doc)
	lge(err)
	if b.String() != testDocs[1] {
		t.Errorf("output unexpted")
	}

	osutilpb.Bytes2File("outp1_inp.html", []byte(testDocs[0]))
	osutilpb.Dom2File("outp2_got.html", doc)
	osutilpb.Bytes2File("outp3_want.html", []byte(testDocs[1]))

	lg("end")

}
示例#2
0
func fileDump(doc *html.Node, fNamer func() string) {
	if fNamer != nil {
		removeCommentsAndIntertagWhitespace(NdX{doc, 0})
		reIndent(doc, 0)
		osutilpb.Dom2File(fNamer()+".html", doc)
		removeCommentsAndIntertagWhitespace(NdX{doc, 0})
	}
}
示例#3
0
func DomClean(b []byte, opt CleaningOptions) (*html.Node, error) {

	lg, lge := loghttp.Logger(nil, nil)
	_ = lg

	b = globFixes(b)
	doc, err := html.Parse(bytes.NewReader(b))
	if err != nil {
		lge(err)
		return nil, err
	}

	if opt.FNamer != nil {
		osutilpb.Dom2File(opt.FNamer()+".html", doc)
	}

	//
	//
	cleanseDom(doc, 0)
	removeCommentsAndIntertagWhitespace(NdX{doc, 0})
	fileDump(doc, opt.FNamer)

	//
	//
	condenseTopDown(doc, 0, 0)
	removeEmptyNodes(doc, 0)
	fileDump(doc, opt.FNamer)

	//
	//
	removeCommentsAndIntertagWhitespace(NdX{doc, 0}) // prevent spacey textnodes around singl child images
	breakoutImagesFromAnchorTrees(doc)
	recurseImg2Link(doc)
	fileDump(doc, opt.FNamer)

	//
	//
	condenseBottomUpV3(doc, 0, 7, map[string]bool{"div": true})
	condenseBottomUpV3(doc, 0, 6, map[string]bool{"div": true})
	condenseBottomUpV3(doc, 0, 5, map[string]bool{"div": true})
	condenseBottomUpV3(doc, 0, 4, map[string]bool{"div": true})
	condenseTopDown(doc, 0, 0)

	removeEmptyNodes(doc, 0)
	removeEmptyNodes(doc, 0)

	fileDump(doc, opt.FNamer)

	//
	//
	if opt.Proxify {
		if opt.ProxyHost == "" {
			opt.ProxyHost = routes.AppHost()
		}

		proxify(doc, opt.ProxyHost, &url.URL{Scheme: "http", Host: opt.RemoteHost})
		fileDump(doc, opt.FNamer)
	}

	if opt.Beautify {
		removeCommentsAndIntertagWhitespace(NdX{doc, 0})
		reIndent(doc, 0)

	}

	//
	//
	if opt.AddOutline {
		addOutlineAttr(doc, 0, []int{0})
	}
	if opt.AddID {
		addIdAttr(doc, 0, 1)
	}
	if opt.AddOutline || opt.AddID {
		fileDump(doc, opt.FNamer)
	}

	//
	computeXPathStack(doc, 0)
	if opt.FNamer != nil {
		osutilpb.Bytes2File(opt.FNamer()+".txt", xPathDump)
	}

	return doc, nil

}