func getTitleNode(document *h5.Tree) (titleNode string, err error) { var chain *selector.Chain if chain, err = selector.Selector("title"); err != nil { return } if matches := chain.Find(document.Top()); len(matches) > 0 { match := matches[0:1] titleNode = h5.RenderNodesToString(match) } return }
// ExtractReader - Acts like Extract but first parses html body from reader. func (m SelectorsMap) ExtractReader(buffer io.Reader) (res interface{}, err error) { // If no buffer return error if buffer == nil { err = ErrNoBody return } // Parse body var node *h5.Tree node, err = h5.New(buffer) if err != nil { return } // Extract from top node res = m.Extract(node.Top()) return }
func rewriteBody(containerSelector string, dest io.Writer, body string) (err error) { if containerSelector == "" { dest.Write([]byte(body)) return } var chain *selector.Chain var document *h5.Tree if document, err = h5.NewFromString(body); err != nil { err = fmt.Errorf("invalid html document: %v", err) return } var titleNode string if titleNode, err = getTitleNode(document); err != nil { return } if chain, err = selector.Selector(containerSelector); err != nil { err = fmt.Errorf("invalid css: %v", containerSelector) return } if matches := chain.Find(document.Top()); len(matches) > 0 { match := matches[0:1] // Take only the first match newBody := h5.RenderNodesToString(h5.Children(match[0])) fmt.Printf("data: %v", h5.Data(match[0])) dest.Write([]byte(titleNode)) dest.Write([]byte(newBody)) return } err = fmt.Errorf("container not found") return }