Пример #1
0
func getTitleNode(document *h5.Tree) (titleNode string, err error) {
	var chain *selector.Chain

	if chain, err = selector.Selector("title"); err != nil {
		return
	}

	if matches := chain.Find(document.Top()); len(matches) > 0 {
		match := matches[0:1]
		titleNode = h5.RenderNodesToString(match)
	}

	return
}
Пример #2
0
// ExtractReader - Acts like Extract but first parses html body from reader.
func (m SelectorsMap) ExtractReader(buffer io.Reader) (res interface{}, err error) {
	// If no buffer return error
	if buffer == nil {
		err = ErrNoBody
		return
	}

	// Parse body
	var node *h5.Tree
	node, err = h5.New(buffer)
	if err != nil {
		return
	}

	// Extract from top node
	res = m.Extract(node.Top())
	return
}
Пример #3
0
func rewriteBody(containerSelector string, dest io.Writer, body string) (err error) {
	if containerSelector == "" {
		dest.Write([]byte(body))
		return
	}

	var chain *selector.Chain
	var document *h5.Tree

	if document, err = h5.NewFromString(body); err != nil {
		err = fmt.Errorf("invalid html document: %v", err)
		return
	}

	var titleNode string
	if titleNode, err = getTitleNode(document); err != nil {
		return
	}

	if chain, err = selector.Selector(containerSelector); err != nil {
		err = fmt.Errorf("invalid css: %v", containerSelector)
		return
	}

	if matches := chain.Find(document.Top()); len(matches) > 0 {
		match := matches[0:1] // Take only the first match
		newBody := h5.RenderNodesToString(h5.Children(match[0]))

		fmt.Printf("data: %v", h5.Data(match[0]))

		dest.Write([]byte(titleNode))
		dest.Write([]byte(newBody))
		return
	}

	err = fmt.Errorf("container not found")
	return
}