示例#1
0
func wrapText(nodes []*html.Node) []*html.Node {
	wrapped := make([]*html.Node, 0, len(nodes))
	var wrapper *html.Node
	appendWrapper := func() {
		if wrapper != nil {
			// render and re-parse so p-inline-p expands
			wrapped = append(wrapped, ParseDepth(Render(wrapper), 0)...)
			wrapper = nil
		}
	}
	for _, n := range nodes {
		if n.Type == html.ElementNode && isBlockElement[n.DataAtom] {
			appendWrapper()
			wrapped = append(wrapped, n)
			continue
		}
		if wrapper == nil && n.Type == html.TextNode && strings.TrimSpace(n.Data) == "" {
			wrapped = append(wrapped, n)
			continue
		}
		if wrapper == nil {
			wrapper = &html.Node{
				Type:     html.ElementNode,
				Data:     "p",
				DataAtom: atom.P,
			}
		}

		wrapper.AppendChild(n)
	}
	appendWrapper()
	return wrapped
}
示例#2
0
func img2Link(img *html.Node) {

	if img.Data == "img" {

		img.Data = "a"
		for i := 0; i < len(img.Attr); i++ {
			if img.Attr[i].Key == "src" {
				img.Attr[i].Key = "href"
			}
		}

		double := closureTextNodeExists(img)
		imgContent := ""
		title := attrX(img.Attr, "title")

		if double {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				"[ctdr]", // content title double removed
				urlBeautify(attrX(img.Attr, "href")))

		} else {
			imgContent = fmt.Sprintf("[img] %v %v | ",
				title,
				urlBeautify(attrX(img.Attr, "href")))
		}

		img.Attr = attrSet(img.Attr, "cfrom", "img")
		nd := dom.Nd("text", imgContent)
		img.AppendChild(nd)
	}

}
示例#3
0
func runMergeNodes(parent, prev, next *html.Node, addSeparator bool) *html.Node {
	var u parserUtils
	if prev != nil {
		parent.AppendChild(prev)
	}
	if next != nil {
		parent.AppendChild(next)
	}
	return u.mergeNodes(parent, prev, next, addSeparator)
}
示例#4
0
// Replace the given node's children with the given string.
func setNodeText(node *html.Node, s string) {
	// remove all existing children
	for node.FirstChild != nil {
		node.RemoveChild(node.FirstChild)
	}
	// add the text
	node.AppendChild(&html.Node{
		Type: html.TextNode,
		Data: s,
	})
}
示例#5
0
// CloneNode makes a copy of a Node with all descendants.
func CloneNode(n *exphtml.Node) *exphtml.Node {
	clone := new(exphtml.Node)
	clone.Type = n.Type
	clone.DataAtom = n.DataAtom
	clone.Data = n.Data
	clone.Attr = make([]exphtml.Attribute, len(n.Attr))
	copy(clone.Attr, n.Attr)
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		nc := CloneNode(c)
		clone.AppendChild(nc)
	}
	return clone
}
示例#6
0
func (u *parserUtils) addChildTextNodeToBegining(node *html.Node, text string) {
	if node.FirstChild != nil && node.FirstChild.Type == html.TextNode {
		node.FirstChild.Data = text + node.FirstChild.Data
	} else {
		newNode := &html.Node{
			Type: html.TextNode,
			Data: text}
		if node.FirstChild == nil {
			node.AppendChild(newNode)
		} else {
			node.InsertBefore(newNode, node.FirstChild)
		}
	}
}
示例#7
0
// append1 actually appends to the merged HTML node tree.
func (ap *appendContext) append1(action rune, text string, proto *html.Node, pos posT) {
	if proto == nil {
		return
	}
	appendPoint, protoAncestor := ap.lastMatchingLeaf(proto, action, pos)
	if appendPoint == nil || protoAncestor == nil {
		return
	}
	if appendPoint.DataAtom != protoAncestor.DataAtom {
		return
	}
	newLeaf := new(html.Node)
	copyNode(newLeaf, proto)
	if proto.Type == html.TextNode {
		newLeaf.Data = text
	}
	if action != '=' {
		insertNode := &html.Node{
			Type:     html.ElementNode,
			DataAtom: atom.Span,
			Data:     "span",
		}
		switch action {
		case '+':
			insertNode.Attr = convertAttributes(ap.c.InsertedSpan)
		case '-':
			insertNode.Attr = convertAttributes(ap.c.DeletedSpan)
		case '~':
			insertNode.Attr = convertAttributes(ap.c.ReplacedSpan)
		}
		insertNode.AppendChild(newLeaf)
		newLeaf = insertNode
	}
	for proto = proto.Parent; proto != nil && proto != protoAncestor; proto = proto.Parent {
		above := new(html.Node)
		copyNode(above, proto)
		above.AppendChild(newLeaf)
		newLeaf = above
	}
	appendPoint.AppendChild(newLeaf)
}
示例#8
0
文件: merge.go 项目: ssor/makeepub
func mergeHtml(folder VirtualFolder, names []string) []byte {
	var result *html.Node = nil
	var body *html.Node = nil

	for _, name := range names {
		f, e := folder.OpenFile(name)
		if e != nil {
			logger.Fatalf("error reading '%s'.\n", name)
		}

		doc, e := html.Parse(f)
		f.Close()
		if e != nil {
			logger.Fatalf("error parsing '%s'.\n", name)
		}

		b := findFirstChild(doc, atom.Body)
		if b == nil {
			logger.Fatalf("'%s' has no 'body' element.\n", name)
		}

		if body == nil {
			result = doc
			body = b
			continue
		}

		for n := b.FirstChild; n != nil; n = b.FirstChild {
			b.RemoveChild(n)
			body.AppendChild(n)
		}
	}

	buf := new(bytes.Buffer)
	if e := html.Render(buf, result); e != nil {
		logger.Fatalf("failed render result for '%s'.\n", folder.Name())
	}

	return buf.Bytes()
}
示例#9
0
func cleanChildren(c *Config, parent *html.Node) {
	var children []*html.Node
	for parent.FirstChild != nil {
		child := parent.FirstChild
		parent.RemoveChild(child)
		children = append(children, filterNode(c, child))
	}

	if c.WrapText {
		_, ok := c.wrap[parent.DataAtom]
		if !ok && parent.DataAtom == 0 {
			_, ok = c.wrapCustom[parent.Data]
		}
		if ok {
			children = wrapText(children)
		}
	}

	for _, child := range children {
		parent.AppendChild(child)
	}
}
示例#10
0
func textifyNodeSubtree(n *html.Node) {

	if n.Type == html.ElementNode {

		nd := dom.Nd("text")
		nd.Data = textifySubtreeBruteForce(n, 0)
		nd.Data = stringspb.NormalizeInnerWhitespace(nd.Data)

		cc := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cc = append(cc, c)
		}
		for _, c := range cc {
			n.RemoveChild(c)
		}

		n.AppendChild(nd)

		nd2 := dom.Nd("br")
		dom.InsertAfter(n, nd2)

	}

}
示例#11
0
func helperRemoveNode(parent, prev, next *html.Node, isSeparator bool) {
	removed := &html.Node{}
	parent.AppendChild(prev)
	parent.AppendChild(removed)
	parent.AppendChild(next)

	var u parserUtils
	result, err := u.removeNode(removed, isSeparator)
	So(err, ShouldBeNil)
	So(result, ShouldEqual, nil)
}
示例#12
0
// CreationDate returns the time an HTML document was created.
//
// It also returns a FileInfo for the document, with the time added in the
// header if it was missing. The bool returned is true the meta creation
// element has been added to the header.
func CreationDate(path string) (*FileInfo, bool, error) {
	title := ""
	f, err := os.Open(path)
	if err != nil {
		return nil, false, err
	}
	defer f.Close()
	stat, err := f.Stat()
	if err != nil {
		return nil, false, err
	}

	doc, err := html.Parse(f)
	if err != nil {
		return nil, false, err
	}
	hasMeta := false
	var head *html.Node
	var found func(*html.Node)
	var created time.Time
	found = func(n *html.Node) {
		if n.Type == html.ElementNode && n.Data == "head" {
			head = n
		}
		if n.Type == html.ElementNode && n.Data == "title" {
			for c := n.FirstChild; c != nil; c = c.NextSibling {
				if c.Type == html.TextNode {
					title = title + c.Data
				}
			}
		}
		if n.Type == html.ElementNode && n.Data == "meta" {
			name, err := getAttrByName(n, "name")
			if err == nil {
				value, err := getAttrByName(n, "value")
				if err == nil && name == "created" {
					created, err = time.Parse(format, value)
					if err != nil {
						created, err = time.Parse(format_no_tz, value)
						if err == nil {
							hasMeta = true
						}
					} else {
						hasMeta = true
					}
				}
			}
		}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			found(c)
		}
	}
	found(doc)

	if !hasMeta {
		now := time.Now()
		meta := &html.Node{
			Type: html.ElementNode,
			Data: "meta", Attr: []html.Attribute{
				{Key: "value", Val: now.Format(format)},
				{Key: "name", Val: "created"},
			}}
		head.AppendChild(meta)
		created = now
	}
	fi := &FileInfo{
		Path:    path,
		Node:    doc,
		Title:   title,
		Created: created,
		Updated: stat.ModTime(),
	}
	return fi, !hasMeta, nil
}
func breakoutImagesFromAnchorTrees(n *html.Node) {

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		breakoutImagesFromAnchorTrees(c)
	}

	if n.Type == html.ElementNode && n.Data == "a" {

		img, lvl := searchImg(n, nil, 0)

		if img != nil {

			only1Child := n.FirstChild != nil && n.FirstChild == n.LastChild
			if lvl == 1 && only1Child {
				// log.Printf("only child image lvl %v a\n", lvl)
				n.RemoveChild(img)
				n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
				contnt := urlBeautify(attrX(n.Attr, "href"))
				if len(contnt) < 6 {
					contnt = "[was img] " + contnt
				}
				n.AppendChild(dom.Nd("text", contnt))
			} else {

				if debugBreakOut {
					b0 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b0)
				}

				// log.Printf("  got it  %v\n", img.Data)
				a1 := dom.CloneNodeWithSubtree(n)
				fc1 := closureDeleter(true)
				fc1(n, 0, false)
				if debugBreakOut {
					b1 := dom.PrintSubtree(n)
					log.Printf("\n%s\n", b1)
				}

				fc2 := closureDeleter(false)
				fc2(a1, 0, false)
				if debugBreakOut {
					b2 := dom.PrintSubtree(a1)
					log.Printf("\n%s\n", b2)
					log.Printf("--------------------\n")
				}

				if true {
					n.Parent.InsertBefore(img, n.NextSibling) // "insert after; if n.NextSibling==nil => insert at the end"
					n.Parent.InsertBefore(a1, img.NextSibling)
				} else {
					// old way ; sequence corrpution if n had rightwise siblings.
					n.Parent.AppendChild(img)
					n.Parent.AppendChild(a1)

				}

			}

			// changing image to link later

		} else {
			// log.Printf("no img in a\n")
		}
	}

}
示例#14
0
func condenseBottomUpV2(n *html.Node, lvl, lvlDo int, types map[string]bool) {

	if lvl < lvlDo {

		cs := []*html.Node{}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			cs = append(cs, c)
		}
		for _, c := range cs {
			condenseBottomUpV2(c, lvl+1, lvlDo, types)
		}

	} else {

		// log.Printf("action on %v %v\n", lvl, lvlDo)

		switch {

		case n.Type == html.ElementNode && types[n.Data]:

			oldPar := n.Parent
			if oldPar == nil {
				return
			}

			b, newPar := flattenSubtreeV2(n, nil, 0, nil)

			// placeholder := dom.Nd("div")
			// par := n.Parent
			// par.InsertBefore(placeholder, n.NextSibling)
			// par.RemoveChild(n)
			// par.InsertBefore(n2, placeholder)

			for c := oldPar.FirstChild; c != nil; c = c.NextSibling {
				oldPar.RemoveChild(c)
			}

			for c := newPar.FirstChild; c != nil; c = c.NextSibling {
				newPar.RemoveChild(c)
				oldPar.AppendChild(c)
			}

			if lvlDo > 4 {
				bx := dom.PrintSubtree(newPar)
				fmt.Printf("%s", bx)
			}

			// n = n2

			nodeRepl := dom.Nd("text", b.String())

			if false {

				// Remove all existing children.
				// Direct loop impossible, since "NextSibling" is set to nil by Remove().
				children := []*html.Node{}
				for c := n.FirstChild; c != nil; c = c.NextSibling {
					children = append(children, c) //  assembling separately, before removing.
				}
				for _, c := range children {
					log.Printf("c %4v rem from %4v ", c.Data, n.Data)
					n.RemoveChild(c)
				}

				// we can't put our replacement "under" an image, since img cannot have children
				if n.Type == html.ElementNode && n.Data == "img" {
					n.Parent.InsertBefore(nodeRepl, n.NextSibling) // if n.NextSibling==nil => insert at the end
					n.Parent.RemoveChild(n)
				} else {
					n.AppendChild(nodeRepl)
				}

				// Insert a  || and a newline before every <a...>
				// if n.Data == "a" {
				// 	n.Parent.InsertBefore(dom.Nd("text", " || "), n)
				// }
			}

		default:
		}

	}

}
示例#15
0
func makeHtml(title_n *html.Node, body_n *html.Node) *html.Node {
	if title_n == nil {
		// make manually title node
		title_n = new(html.Node)
		*title_n = html.Node{
			Parent:      nil,
			FirstChild:  nil,
			LastChild:   nil,
			PrevSibling: nil,
			NextSibling: nil,
			Type:        html.ElementNode,
			DataAtom:    atom.Title,
			Data:        "title",
			Attr:        []html.Attribute{},
		}
		title_text := new(html.Node)
		*title_text = html.Node{
			Parent:      nil,
			FirstChild:  nil,
			LastChild:   nil,
			PrevSibling: nil,
			NextSibling: nil,
			Type:        html.TextNode,
			DataAtom:    0,
			Data:        "Empty title",
			Attr:        []html.Attribute{},
		}
		title_n.AppendChild(title_text)
	} else {
		// clear tag from parametrs
		title_n.Attr = []html.Attribute{}
		// remove parents for correct work
		title_n.Parent.RemoveChild(title_n)
	}
	if body_n == nil {
		// make manually body node
		body_n = new(html.Node)
		*body_n = html.Node{
			Parent:      nil,
			FirstChild:  nil,
			LastChild:   nil,
			PrevSibling: nil,
			NextSibling: nil,
			Type:        html.ElementNode,
			DataAtom:    atom.Body,
			Data:        "body",
			Attr:        []html.Attribute{},
		}
		body_text := new(html.Node)
		*body_text = html.Node{
			Parent:      nil,
			FirstChild:  nil,
			LastChild:   nil,
			PrevSibling: nil,
			NextSibling: nil,
			Type:        html.TextNode,
			DataAtom:    0,
			Data:        "Empty body",
			Attr:        []html.Attribute{},
		}
		body_n.AppendChild(body_text)
	} else {
		body_n.Attr = []html.Attribute{}
		body_n.Parent.RemoveChild(body_n)
	}
	model := "<html><head><meta charset=\"utf-8\"></head></html>"
	output, _ := html.Parse(strings.NewReader(model))

	htmlnode := output.FirstChild
	headnode := htmlnode.FirstChild
	defbodynode := headnode.NextSibling
	output.FirstChild.RemoveChild(defbodynode) // delete empty <body> tag
	headnode.AppendChild(title_n)
	htmlnode.AppendChild(body_n)

	return output
}
示例#16
0
文件: node.go 项目: justintan/gox
func AppendChildNodes(parent *html.Node, children []*html.Node) {
	for _, c := range children {
		parent.AppendChild(c)
	}
}
示例#17
0
func flattenSubtreeV2(n *html.Node, b *bytes.Buffer, depth int, tpar *html.Node) (*bytes.Buffer, *html.Node) {

	if b == nil {
		b = new(bytes.Buffer)
	}
	if tpar == nil {
		tpar = &html.Node{
			Type:     n.Type,
			DataAtom: n.DataAtom,
			Data:     n.Data,
			Attr:     make([]html.Attribute, len(n.Attr)),
		}
		copy(tpar.Attr, n.Attr)
	}

	switch {
	case n.Type == html.ElementNode && n.Data == "a":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
		// wpf(b, "[a] ")
	case n.Type == html.ElementNode && n.Data == "img":
		// img2Link(n)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "em" || n.Data == "strong":
		wpf(b, "[%v l%v] ", n.Data, depth)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "label" || n.Data == "input" || n.Data == "textarea":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "p" || n.Data == "div" || n.Data == "li" || n.Data == "ol" || n.Data == "h1" || n.Data == "h2" || n.Data == "ul":
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	case n.Data == "span":
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			n.RemoveChild(c)
			tpar.AppendChild(c)
		}
		n.Parent.RemoveChild(n)
	case n.Type == html.TextNode && n.Data != "":
		n.Data = strings.TrimSpace(n.Data)
		n.Data += " "
		wpf(b, n.Data)
		n.Parent.RemoveChild(n)
		tpar.AppendChild(n)
	default:
		log.Printf("unhandled %s %s\n", dom.NodeTypeStr(n.Type), n.Data)
		n.Parent.RemoveChild(n)
	}

	//
	//
	children := []*html.Node{}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		// fmt.Printf("still has children %v\n", c.Data)
		children = append(children, c) //  assembling separately, before removing.
	}
	for _, c := range children {
		flattenSubtreeV2(c, b, depth+1, tpar)
	}

	return b, tpar
}
示例#18
0
func flattenSubtreeV3Inner(n, nClone *html.Node, lvl int) {

	// log.Printf("fsbi\n")

	for ch := n.FirstChild; ch != nil; ch = ch.NextSibling {

		chClone := dom.CloneNode(ch)

		switch {

		case ch.Type == html.ElementNode && standard[ch.Data]:
			nClone.AppendChild(chClone)
			flattenSubtreeV3Inner(ch, chClone, lvl+1)

		case ch.Type == html.ElementNode && ch.Data == "a":
			nClone.AppendChild(chClone)
			flattenSubtreeV3Inner(ch, chClone, lvl+1)

		case ch.Type == html.ElementNode && ch.Data == "img":
			nClone.AppendChild(chClone)

		case ch.Data == "span":
			// log.Printf(strings.Repeat("  ", lvl) + "span \n")
			for cch := ch.FirstChild; cch != nil; cch = cch.NextSibling {
				// log.Printf(strings.Repeat("    ", lvl)+"span child %v", cch.Data)
				cchClone := dom.CloneNode(cch)
				nClone.AppendChild(cchClone)
				nClone.AppendChild(dom.Nd("text", " "))
				flattenSubtreeV3Inner(cch, cchClone, lvl+1)
			}

		case ch.Type == html.TextNode && ch.Data != "":
			chClone.Data = strings.TrimSpace(chClone.Data)
			chClone.Data += " "
			nClone.AppendChild(chClone)

		default:
			//			nClone.AppendChild(chClone)
			log.Printf("unhandled %s %s\n", dom.NodeTypeStr(ch.Type), ch.Data)

		}

	}

}