Example #1
0
func (u *parserUtils) mergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
	prevText := prev != nil && prev.Type == html.TextNode
	nextText := next != nil && next.Type == html.TextNode
	delim := ""
	if addSeparator {
		delim = " "
	}

	if prevText && nextText {
		prev.Data = prev.Data + delim + next.Data
		parent.RemoveChild(next)
		return prev.NextSibling
	}

	if prevText {
		prev.Data = prev.Data + delim
	} else if nextText {
		next.Data = delim + next.Data
	} else if addSeparator {
		newNode := &html.Node{
			Type: html.TextNode,
			Data: delim}
		parent.InsertBefore(newNode, next)
	}

	return next
}
Example #2
0
func (u *parserUtils) addChildTextNodeToBegining(node *html.Node, text string) {
	if node.FirstChild != nil && node.FirstChild.Type == html.TextNode {
		node.FirstChild.Data = text + node.FirstChild.Data
	} else {
		newNode := &html.Node{
			Type: html.TextNode,
			Data: text}
		if node.FirstChild == nil {
			node.AppendChild(newNode)
		} else {
			node.InsertBefore(newNode, node.FirstChild)
		}
	}
}
Example #3
0
// Minifies node and returns a minification Result.
func doMinify(node *html.Node, ctx *context) result {
	prevWasWhitespace := false
	var next *html.Node
	rv := result{}
	for child := node.FirstChild; child != nil; child = next {
		next = child.NextSibling
		script := getHTMLNodeAttr(child, "script", "src")
		if rv.IndexHTMLBase == "" {
			rv.IndexHTMLBase = getHTMLNodeAttr(child, "base", "href")
		}
		switch {
		case strings.Contains(script, "libs/") && strings.HasSuffix(script, ".js"):
			minFile := script[:len(script)-3] + ".min.js"
			if _, err := os.Stat(filepath.Join(ctx.BaseDir, minFile)); err == nil {
				replaceAttrValue(child, "src", minFile)
			}
			prevWasWhitespace = false
		case strings.HasSuffix(script, ".js"):
			if !ctx.FoundFirstAppScript {
				ctx.FoundFirstAppScript = true
				node.InsertBefore(makeAppMinJsNode(), child)
				node.InsertBefore(makeNewLine(), child)
			}
			rv.AppScripts = append(rv.AppScripts, script)
			node.RemoveChild(child)
		case isWhitespaceText(child) && node.Type == html.ElementNode && node.Data == "head":
			if !prevWasWhitespace {
				node.InsertBefore(makeNewLine(), child)
			}
			node.RemoveChild(child)
			prevWasWhitespace = true
		default:
			if isPluggableUIInjectionComment(child) {
				rv.PluggableInjectionCount++
			} else {
				childResult := doMinify(child, ctx)
				rv.merge(childResult)
			}
			prevWasWhitespace = false
		}
	}
	return rv
}
Example #4
0
// Condense upwards builds a three-levels subtree
// starting from param node l1
// l2 and l3 nodes need to comply by type
//
// Then l3 is moved under l1; l2 is eliminated
//
// For <a> or "text" l3 nodes, we could introduce wrappers
//
// l2Types so far always is "div".
// Multiple l2Types are possible, but difficult to imagine.
//
// l1 type could be changed - from div to ul for instance, but I found no use for that
//
// Implementation yields similar result as condenseTopDown1
// but the "all-or-nothing" logic is clearer
func topDownV2(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {

	if l1.Type != html.ElementNode &&
		l1.Type != html.DocumentNode {
		return // cannot assign to - do not unable to have children
	}
	if l1.Data == "span" || l1.Data == "a" {
		return // want not condense into
	}

	// dig two levels deeper

	// isolate l2
	var l2s []*html.Node
	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {
		l2s = append(l2s, l2)
		// l2s = append([]*html.Node{l2}, l2s...) // order inversion
	}

	// measure types
	l2Div := true

	// note that *all* l3 must have l3Type, not just those those of one l2 element
	// otherwise we get only partial restructuring - and therefore sequence errors
	l3Div := true

	for _, l2 := range l2s {
		l2Div = l2Div && l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div
		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
			l3Div = l3Div && (l3.Type == html.ElementNode && l3Types[l3.Data]) // l3 is a div or ul or form
		}
	}

	// act
	if l2Div && l3Div {
		for _, l2 := range l2s {

			// isolate l3
			var l3s []*html.Node
			for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
				l3s = append(l3s, l3)
				// l3s = append([]*html.Node{l3}, l3s...) // order inversion
			}

			// detach l3 from l2
			for _, l3 := range l3s {
				l2.RemoveChild(l3)
			}
			l1.RemoveChild(l2) // detach l2 from l1

			for _, l3 := range l3s {
				// attach l3 to l1, possible wrapper of <a> or <span>
				l1.InsertBefore(l3, nil) // insert at end

				// wrap := html.Node{Type: html.ElementNode, Data: "p", Attr: []html.Attribute{html.Attribute{Key: "cfrm", Val: "div"}}}
				// wrap.FirstChild = c1
				// l1.InsertBefore(&wrap, nil)

			}

		}
	}

}
Example #5
0
// Now this third implementation finally condenses *selectively*.
// Not all boats from each pond are lifted equally.
// We achieve tremendous structural simplification.
// It also starts from top, pulling lower levels up.
// Unlike implementation #1, that started from the middle.
func topDownV3(l1 *html.Node, l2Types map[string]bool, l3Types map[string]bool) {

	if l1.Type != html.ElementNode &&
		l1.Type != html.DocumentNode {
		return // cannot assign to - do not unable to have children
	}
	if l1.Data == "span" || l1.Data == "a" {
		return // want not condense into
	}

	// dig two levels deep

	// isolate l2,l3
	l2s := []*html.Node{}
	l3s := map[*html.Node][]*html.Node{}

	for l2 := l1.FirstChild; l2 != nil; l2 = l2.NextSibling {

		l2s = append(l2s, l2)
		// l2s = append([]*html.Node{l2}, l2s...) // order inversion

		for l3 := l2.FirstChild; l3 != nil; l3 = l3.NextSibling {
			l3s[l2] = append(l3s[l2], l3)
			// l3s[l2] = append(map[*html.Node][]*html.Node{l2: []*html.Node{l3}}, l3s[l2]...) // order inversion
		}
	}

	postponedRemoval := map[*html.Node]bool{}

	//
	//
	// check types for each l2 subtree distinctively
	for _, l2 := range l2s {

		l2Match := l2.Type == html.ElementNode && l2Types[l2.Data] // l2 is a div

		l3Match := true
		for _, l3 := range l3s[l2] {
			l3Match = l3Match && (l3.Type == html.ElementNode && l3Types[l3.Data])
		}

		// act
		if l2Match && l3Match {

			// detach l3 from l2
			for _, l3 := range l3s[l2] {
				// if ml3[l3] > 0 {
				// 	fmt.Printf("rmd_%v_%v ", ml3[l3], l3.Data)
				// }
				l2.RemoveChild(l3)
				// ml3[l3]++
			}

			// Since we still need l2 below
			// We have to postpone detaching l2 from l1
			// to the bottom
			// NOT HERE: l1.RemoveChild(l2)
			postponedRemoval[l2] = true

			for _, l3 := range l3s[l2] {
				// attach l3 to l1

				if l3.Data != "a" && l3.Data != "span" {
					l1.InsertBefore(l3, l2)
				} else {
					wrap := dom.Nd("p")
					wrap.Attr = []html.Attribute{html.Attribute{Key: "cfrm", Val: "noth"}}
					wrap.AppendChild(l3)
					// NOT  wrap.FirstChild = l3
					l1.InsertBefore(wrap, l2)
				}
			}

		}

	}

	for k, _ := range postponedRemoval {
		l1.RemoveChild(k) // detach l2 from l1
	}

}