func (u *parserUtils) mergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node { prevText := prev != nil && prev.Type == html.TextNode nextText := next != nil && next.Type == html.TextNode delim := "" if addSeparator { delim = " " } if prevText && nextText { prev.Data = prev.Data + delim + next.Data parent.RemoveChild(next) return prev.NextSibling } if prevText { prev.Data = prev.Data + delim } else if nextText { next.Data = delim + next.Data } else if addSeparator { newNode := &html.Node{ Type: html.TextNode, Data: delim} parent.InsertBefore(newNode, next) } return next }
func (u *parserUtils) addChildTextNodeToBegining(node *html.Node, text string) { if node.FirstChild != nil && node.FirstChild.Type == html.TextNode { node.FirstChild.Data = text + node.FirstChild.Data } else { newNode := &html.Node{ Type: html.TextNode, Data: text} if node.FirstChild == nil { node.AppendChild(newNode) } else { node.InsertBefore(newNode, node.FirstChild) } } }
// Minifies node and returns a minification Result. func doMinify(node *html.Node, ctx *context) result { prevWasWhitespace := false var next *html.Node rv := result{} for child := node.FirstChild; child != nil; child = next { next = child.NextSibling script := getHTMLNodeAttr(child, "script", "src") if rv.IndexHTMLBase == "" { rv.IndexHTMLBase = getHTMLNodeAttr(child, "base", "href") } switch { case strings.Contains(script, "libs/") && strings.HasSuffix(script, ".js"): minFile := script[:len(script)-3] + ".min.js" if _, err := os.Stat(filepath.Join(ctx.BaseDir, minFile)); err == nil { replaceAttrValue(child, "src", minFile) } prevWasWhitespace = false case strings.HasSuffix(script, ".js"): if !ctx.FoundFirstAppScript { ctx.FoundFirstAppScript = true node.InsertBefore(makeAppMinJsNode(), child) node.InsertBefore(makeNewLine(), child) } rv.AppScripts = append(rv.AppScripts, script) node.RemoveChild(child) case isWhitespaceText(child) && node.Type == html.ElementNode && node.Data == "head": if !prevWasWhitespace { node.InsertBefore(makeNewLine(), child) } node.RemoveChild(child) prevWasWhitespace = true default: if isPluggableUIInjectionComment(child) { rv.PluggableInjectionCount++ } else { childResult := doMinify(child, ctx) rv.merge(childResult) } prevWasWhitespace = false } } return rv }
// Condense upwards builds a three-levels subtree // starting from param node l1 // l2 and l3 nodes need to comply by type // // Then l3 is moved under l1; l2 is eliminated // // For <a> or "text" l3 nodes, we could introduce wrappers // // l2Types so far always is "div". // Multiple l2Types are possible, but difficult to imagine. // // l1 type could be changed - from div to ul for instance, but I found no use for that // // Implementation yields similar result as condenseTopDown1 // but the "all-or-nothing" logic is clearer func topDownV2(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) { if l1.Type != html.ElementNode && l1.Type != html.DocumentNode { return // cannot assign to - do not unable to have children } if l1.Data == "span" || l1.Data == "a" { return // want not condense into } // dig two levels deeper // isolate l2 var l2s []*html.Node for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling { l2s = append(l2s, l2) // l2s = append([]*html.Node{l2}, l2s...) // order inversion } // measure types l2Div := true // note that *all* l3 must have l3Type, not just those those of one l2 element // otherwise we get only partial restructuring - and therefore sequence errors l3Div := true for _, l2 := range l2s { l2Div = l2Div && l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling { l3Div = l3Div && (l3.Type == html.ElementNode && l3Types[l3.Data]) // l3 is a div or ul or form } } // act if l2Div && l3Div { for _, l2 := range l2s { // isolate l3 var l3s []*html.Node for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling { l3s = append(l3s, l3) // l3s = append([]*html.Node{l3}, l3s...) // order inversion } // detach l3 from l2 for _, l3 := range l3s { l2.RemoveChild(l3) } l1.RemoveChild(l2) // detach l2 from l1 for _, l3 := range l3s { // attach l3 to l1, possible wrapper of <a> or <span> l1.InsertBefore(l3, nil) // insert at end // wrap := html.Node{Type: html.ElementNode, Data: "p", Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}} // wrap.FirstChild = c1 // l1.InsertBefore(&wrap, nil) } } } }
// Now this third implementation finally condenses *selectively*. // Not all boats from each pond are lifted equally. // We achieve tremendous structural simplification. // It also starts from top, pulling lower levels up. // Unlike implementation #1, that started from the middle. func topDownV3(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) { if l1.Type != html.ElementNode && l1.Type != html.DocumentNode { return // cannot assign to - do not unable to have children } if l1.Data == "span" || l1.Data == "a" { return // want not condense into } // dig two levels deep // isolate l2,l3 l2s := []*html.Node{} l3s := map[*html.Node][]*html.Node{} for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling { l2s = append(l2s, l2) // l2s = append([]*html.Node{l2}, l2s...) // order inversion for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling { l3s[l2] = append(l3s[l2], l3) // l3s[l2] = append(map[*html.Node][]*html.Node{l2: []*html.Node{l3}}, l3s[l2]...) // order inversion } } postponedRemoval := map[*html.Node]bool{} // // // check types for each l2 subtree distinctively for _, l2 := range l2s { l2Match := l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div l3Match := true for _, l3 := range l3s[l2] { l3Match = l3Match && (l3.Type == html.ElementNode && l3Types[l3.Data]) } // act if l2Match && l3Match { // detach l3 from l2 for _, l3 := range l3s[l2] { // if ml3[l3] > 0 { // fmt.Printf("rmd_%v_%v ", ml3[l3], l3.Data) // } l2.RemoveChild(l3) // ml3[l3]++ } // Since we still need l2 below // We have to postpone detaching l2 from l1 // to the bottom // NOT HERE: l1.RemoveChild(l2) postponedRemoval[l2] = true for _, l3 := range l3s[l2] { // attach l3 to l1 if l3.Data != "a" && l3.Data != "span" { l1.InsertBefore(l3, l2) } else { wrap := dom.Nd("p") wrap.Attr = []html.Attribute{html.Attribute{Key: "cfrm", Val: "noth"}} wrap.AppendChild(l3) // NOT wrap.FirstChild = l3 l1.InsertBefore(wrap, l2) } } } } for k, _ := range postponedRemoval { l1.RemoveChild(k) // detach l2 from l1 } }