func reIndent(n *html.Node, lvl int) { if lvl > cScaffoldLvls && n.Parent == nil { bb := dom.PrintSubtree(n) _ = bb // log.Printf("%s", bb.Bytes()) hint := "" if ml3[n] > 0 { hint = " from ml3" } log.Print("reIndent: no parent ", hint) return } // Before children processing switch n.Type { case html.ElementNode: if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode { ind := strings.Repeat("\t", lvl-2) dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n" + ind}) } case html.CommentNode: dom.InsertBefore(n, &html.Node{Type: html.TextNode, Data: "\n"}) case html.TextNode: n.Data = strings.TrimSpace(n.Data) + " " if !strings.HasPrefix(n.Data, ",") && !strings.HasPrefix(n.Data, ".") { n.Data = " " + n.Data } // link texts without trailing space if n.Parent != nil && n.Parent.Data == "a" { n.Data = strings.TrimSpace(n.Data) } } // Children for c := n.FirstChild; c != nil; c = c.NextSibling { reIndent(c, lvl+1) } // After children processing switch n.Type { case html.ElementNode: // I dont know why, // but this needs to happend AFTER the children if lvl > cScaffoldLvls && n.Parent.Type == html.ElementNode { ind := strings.Repeat("\t", lvl-2) ind = "\n" + ind // link texts without new line if n.Data == "a" { ind = "" } if n.LastChild != nil { dom.InsertAfter(n.LastChild, &html.Node{Type: html.TextNode, Data: ind}) } } } }
func textifyNodeSubtree(n *html.Node) { if n.Type == html.ElementNode { nd := dom.Nd("text") nd.Data = textifySubtreeBruteForce(n, 0) nd.Data = stringspb.NormalizeInnerWhitespace(nd.Data) cc := []*html.Node{} for c := n.FirstChild; c != nil; c = c.NextSibling { cc = append(cc, c) } for _, c := range cc { n.RemoveChild(c) } n.AppendChild(nd) nd2 := dom.Nd("br") dom.InsertAfter(n, nd2) } }
// r is the request to the proxy // u is the url, that the proxy has called func ModifyHTML(r *http.Request, u *url.URL, s string) string { var nums int // counter // needed to get the current request into the // "static" recursive functions var PackageProxyHost = r.Host // port included! var PackageRemoteHost = fetch.HostFromUrl(u) fCondenseNode = func(n *html.Node, depth int) (ret string) { if n.Type == html.ElementNode && n.Data == "script" { ret += fmt.Sprintf(" var script%v = '[script]'; ", nums) nums++ return } if n.Type == html.ElementNode && n.Data == "style" { ret += fmt.Sprintf(" .xxx {margin:2px;} ") return } if n.Type == html.ElementNode && n.Data == "img" { ret += fmt.Sprintf(" [img] %v %v | ", getAttrVal(n.Attr, "alt"), getAttrVal(n.Attr, "src")) } if n.Type == html.ElementNode && n.Data == "a" { ret += "[a]" } if n.Type == html.TextNode { s := n.Data // s = replTabsNewline.Replace(s) // s = strings.TrimSpace(s) if len(s) < 4 { ret += s } else if s != "" { if depth > 0 { ret += fmt.Sprintf(" [txt%v] %v", depth, s) } else { ret += " [txt] " + s } } } for c := n.FirstChild; c != nil; c = c.NextSibling { ret += fCondenseNode(c, depth+1) } return } // -------------------------- // ---------------------- fRecurse = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "form" { hidFld := new(html.Node) hidFld.Type = html.ElementNode hidFld.Data = "input" hidFld.Attr = []html.Attribute{ html.Attribute{Key: "name", Val: "redirect-to"}, html.Attribute{Key: "value", Val: absolutize(getAttrVal(n.Attr, "action"), PackageRemoteHost)}, } n.AppendChild(hidFld) submt := new(html.Node) submt.Type = html.ElementNode submt.Data = "input" submt.Attr = []html.Attribute{ html.Attribute{Key: "type", Val: "submit"}, html.Attribute{Key: "value", Val: "subm"}, html.Attribute{Key: "accesskey", Val: "f"}, } n.AppendChild(submt) n.Attr = rewriteAttributes(n.Attr, PackageProxyHost, PackageRemoteHost) } if n.Type == html.ElementNode && n.Data == "script" { for i := 0; i < len(n.Attr); i++ { if n.Attr[i].Key == "src" { n.Attr[i].Val = emptySrc } } } if n.Type == html.ElementNode && (n.Data == "a" || n.Data == "img" || n.Data == "script" || n.Data == "style") { s := fCondenseNode(n, 0) //fmt.Printf("found %v\n", s) textReplacement := new(html.Node) textReplacement.Type = html.TextNode textReplacement.Data = s attrStore := []html.Attribute{} if n.Data == "a" || n.Data == "img" { attrStore = rewriteAttributes(n.Attr, PackageProxyHost, PackageRemoteHost) } if n.Data == "img" { n.Data = "a" } if n.Data == "a" { n.Attr = attrStore } // We want to remove all existing children. // Direct loop impossible, since "NextSibling" is set to nil by Remove(). // Therefore first assembling separately, then removing. children := make(map[*html.Node]struct{}) for c := n.FirstChild; c != nil; c = c.NextSibling { children[c] = struct{}{} } for k, _ := range children { n.RemoveChild(k) } // we can't put our replacement "under" an image, since img cannot have children if n.Type == html.ElementNode && n.Data == "img" { // n.Parent.InsertBefore(textReplacement,n) dom.InsertAfter(n, textReplacement) dom.RemoveNode(n) } else { n.AppendChild(textReplacement) } // Insert a || and a newline before every <a...> if n.Data == "a" { prev := n breaker0 := dom.Nd("text", "||") n.Parent.InsertBefore(breaker0, prev) breaker1 := dom.Nd("br") n.Parent.InsertBefore(breaker1, prev) breaker2 := dom.Nd("text", "\n") n.Parent.InsertBefore(breaker2, prev) } } for c := n.FirstChild; c != nil; c = c.NextSibling { fRecurse(c) } } // -------------------------- // ---------------------- var docRoot *html.Node var err error rdr := strings.NewReader(s) docRoot, err = html.Parse(rdr) if err != nil { panic(fmt.Sprintf("3 %v \n", err)) } fRecurse(docRoot) var b bytes.Buffer err = html.Render(&b, docRoot) if err != nil { panic(fmt.Sprintf("4 %v \n", err)) } // log.Printf("len is %v\n", b.Len()) return b.String() }