示例#1
0
func reIndent(n *html.Node, lvl int) {

	if lvl > cScaffoldLvls && n.Parent == nil {
		bb := dom.PrintSubtree(n)
		_ = bb
		// log.Printf("%s", bb.Bytes())
		hint := ""
		if ml3[n] > 0 {
			hint = "   from ml3"
		}
		log.Print("reIndent: no parent ", hint)
		return
	}

	// Before children processing
	switch n.Type {
	case html.ElementNode:
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind})
		}
	case html.CommentNode:
		dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"})
	case html.TextNode:
		n.Data = strings.TrimSpace(n.Data) + " "
		if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") {
			n.Data = " " + n.Data
		}
		// link texts without trailing space
		if n.Parent != nil && n.Parent.Data == "a" {
			n.Data = strings.TrimSpace(n.Data)
		}
	}

	// Children
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		reIndent(c, lvl+1)
	}

	// After children processing
	switch n.Type {
	case html.ElementNode:
		// I dont know why,
		// but this needs to happend AFTER the children
		if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode {
			ind := strings.Repeat("\t", lvl-2)
			ind = "\n" + ind
			// link texts without new line
			if n.Data == "a" {
				ind = ""
			}
			if n.LastChild != nil {
				dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind})
			}
		}
	}

}
示例#2
0
func textifyNodeSubtree(n *html.Node) {

	if n.Type == html.ElementNode {

		nd := dom.Nd("text")
		nd.Data = textifySubtreeBruteForce(n, 0)
		nd.Data = stringspb.NormalizeInnerWhitespace(nd.Data)

		cc := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cc = append(cc, c)
		}
		for _, c := range cc {
			n.RemoveChild(c)
		}

		n.AppendChild(nd)

		nd2 := dom.Nd("br")
		dom.InsertAfter(n, nd2)

	}

}
示例#3
0
// r is the request to the proxy
// u is the url, that the proxy has called
func ModifyHTML(r *http.Request, u *url.URL, s string) string {

	var nums int // counter

	// needed to get the current request into the
	// "static" recursive functions
	var PackageProxyHost = r.Host // port included!
	var PackageRemoteHost = fetch.HostFromUrl(u)

	fCondenseNode = func(n *html.Node, depth int) (ret string) {

		if n.Type == html.ElementNode && n.Data == "script" {
			ret += fmt.Sprintf(" var script%v = '[script]'; ", nums)
			nums++
			return
		}
		if n.Type == html.ElementNode && n.Data == "style" {
			ret += fmt.Sprintf(" .xxx {margin:2px;} ")
			return
		}

		if n.Type == html.ElementNode && n.Data == "img" {
			ret += fmt.Sprintf(" [img] %v %v | ", getAttrVal(n.Attr, "alt"), getAttrVal(n.Attr, "src"))
		}

		if n.Type == html.ElementNode && n.Data == "a" {
			ret += "[a]"
		}

		if n.Type == html.TextNode {
			s := n.Data
			// s = replTabsNewline.Replace(s)
			// s = strings.TrimSpace(s)
			if len(s) < 4 {
				ret += s
			} else if s != "" {
				if depth > 0 {
					ret += fmt.Sprintf(" [txt%v] %v", depth, s)
				} else {
					ret += " [txt] " + s
				}
			}
		}

		for c := n.FirstChild; c != nil; c = c.NextSibling {
			ret += fCondenseNode(c, depth+1)
		}
		return
	}

	// --------------------------
	// ----------------------

	fRecurse = func(n *html.Node) {

		if n.Type == html.ElementNode && n.Data == "form" {
			hidFld := new(html.Node)
			hidFld.Type = html.ElementNode
			hidFld.Data = "input"
			hidFld.Attr = []html.Attribute{
				html.Attribute{Key: "name", Val: "redirect-to"},
				html.Attribute{Key: "value", Val: absolutize(getAttrVal(n.Attr, "action"), PackageRemoteHost)},
			}
			n.AppendChild(hidFld)

			submt := new(html.Node)
			submt.Type = html.ElementNode
			submt.Data = "input"
			submt.Attr = []html.Attribute{
				html.Attribute{Key: "type", Val: "submit"},
				html.Attribute{Key: "value", Val: "subm"},
				html.Attribute{Key: "accesskey", Val: "f"},
			}
			n.AppendChild(submt)

			n.Attr = rewriteAttributes(n.Attr, PackageProxyHost, PackageRemoteHost)

		}
		if n.Type == html.ElementNode && n.Data == "script" {
			for i := 0; i < len(n.Attr); i++ {
				if n.Attr[i].Key == "src" {
					n.Attr[i].Val = emptySrc
				}
			}
		}
		if n.Type == html.ElementNode &&
			(n.Data == "a" || n.Data == "img" || n.Data == "script" || n.Data == "style") {

			s := fCondenseNode(n, 0)
			//fmt.Printf("found %v\n", s)
			textReplacement := new(html.Node)
			textReplacement.Type = html.TextNode
			textReplacement.Data = s

			attrStore := []html.Attribute{}
			if n.Data == "a" || n.Data == "img" {
				attrStore = rewriteAttributes(n.Attr, PackageProxyHost, PackageRemoteHost)
			}
			if n.Data == "img" {
				n.Data = "a"
			}
			if n.Data == "a" {
				n.Attr = attrStore
			}

			// We want to remove all existing children.
			// Direct loop impossible, since "NextSibling" is set to nil by Remove().
			// Therefore first assembling separately, then removing.
			children := make(map[*html.Node]struct{})
			for c := n.FirstChild; c != nil; c = c.NextSibling {
				children[c] = struct{}{}
			}
			for k, _ := range children {
				n.RemoveChild(k)
			}

			// we can't put our replacement "under" an image, since img cannot have children
			if n.Type == html.ElementNode && n.Data == "img" {
				// n.Parent.InsertBefore(textReplacement,n)
				dom.InsertAfter(n, textReplacement)
				dom.RemoveNode(n)

			} else {
				n.AppendChild(textReplacement)
			}

			// Insert a  || and a newline before every <a...>
			if n.Data == "a" {
				prev := n

				breaker0 := dom.Nd("text", "||")
				n.Parent.InsertBefore(breaker0, prev)

				breaker1 := dom.Nd("br")
				n.Parent.InsertBefore(breaker1, prev)

				breaker2 := dom.Nd("text", "\n")
				n.Parent.InsertBefore(breaker2, prev)
			}

		}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			fRecurse(c)
		}
	}

	// --------------------------
	// ----------------------
	var docRoot *html.Node
	var err error
	rdr := strings.NewReader(s)
	docRoot, err = html.Parse(rdr)
	if err != nil {
		panic(fmt.Sprintf("3 %v \n", err))
	}

	fRecurse(docRoot)

	var b bytes.Buffer
	err = html.Render(&b, docRoot)
	if err != nil {
		panic(fmt.Sprintf("4 %v \n", err))
	}
	// log.Printf("len is %v\n", b.Len())

	return b.String()
}