Exemple #1
0
func ParseEntry(r io.Reader) (*AmebloEntry, error) {
	root, err := html.Parse(r)
	if err != nil {
		return nil, err
	}
	s, _ := selector.Selector(".articleText")
	nodes := s.Find(root)
	if len(nodes) == 0 {
		return nil, nil
	}
	content := h5.RenderNodesToString(nodes)

	s, _ = selector.Selector("title")
	nodes = s.Find(root)
	if len(nodes) == 0 {
		return nil, nil
	}
	title := extractText(nodes[0].FirstChild)

	entry := &AmebloEntry{
		Title:   strings.Split(title, "|")[0],
		Content: content,
	}
	return entry, nil
}
func TestSelectorMatch(t *testing.T) {
	for _, spec := range matchers {
		chn, err := Selector(spec.s)
		if err != nil {
			t.Errorf("Error parsing selector %q", err)
		}
		if !chn.Head.Match(spec.n) {
			t.Errorf("spec %q didn't match %q when it should have",
				chn, h5.RenderNodesToString([]*html.Node{spec.n}))
		}
		if chn.Head.Match(spec.n2) {
			t.Errorf("spec %q matched %q when it shouldn't have",
				chn, h5.RenderNodesToString([]*html.Node{spec.n2}))
		}
	}
}
func TestSelectorFind(t *testing.T) {
	for _, spec := range finders {
		chn, err := Selector(spec.s)
		if err != nil {
			t.Errorf("Error parsing selector %q", err)
		}
		ns := chn.Find(spec.n)
		if len(ns) < 1 {
			t.Errorf("%q didn't find any nodes in %q",
				chn, h5.RenderNodesToString([]*html.Node{spec.n}))
		}
		if h5.RenderNodesToString(ns) != h5.RenderNodesToString(spec.ns) {
			t.Errorf("%q != %q",
				h5.RenderNodesToString(ns), h5.RenderNodesToString(spec.ns))
		}
	}
}
Exemple #4
0
func CompareNodes(originalNodes, expectedNodes []*html.Node) *Error {
	if len(originalNodes) != len(expectedNodes) {
		return &Error{
			Description: fmt.Sprintf("Expected node to have %v elements, but got %v", len(expectedNodes), len(originalNodes)),
			Got:         h5.RenderNodesToString(originalNodes),
			Expected:    h5.RenderNodesToString(expectedNodes),
		}
	}
	for i, node := range originalNodes {
		expectedNode := expectedNodes[i]
		err := CompareNode(node, expectedNode)
		if err != nil {
			return err
		}
	}
	return nil
}
Exemple #5
0
func CompareNode(originalNode, expectedNode *html.Node) *Error {
	err := &Error{
		Got:      h5.RenderNodesToString([]*html.Node{originalNode}),
		Expected: h5.RenderNodesToString([]*html.Node{expectedNode}),
	}
	if originalNode.Type != expectedNode.Type {
		err.Description = "Node type does not match"
		return err
	}
	if originalNode.Data != expectedNode.Data {
		err.Description = "Nodes data does not match"
		return err
	}
	for _, attr := range expectedNode.Attr {
		attrFound := false
		attrValueSame := false
		for _, originalAttr := range originalNode.Attr {
			if originalAttr.Key == attr.Key {
				attrFound = true
				if originalAttr.Val == attr.Val {
					attrValueSame = true
				} else {
					if attr.Key == "class" {
						attrValueSame = equalWithSeparator(originalAttr.Val, attr.Val, " ")
					}
					if attr.Key == "style" {
						attrValueSame = equalWithSeparator(originalAttr.Val, attr.Val, ";")
					}
				}
			}
		}
		if !attrFound {
			err.Description = fmt.Sprintf("Attribute %v not found in node", attr.Key)
			return err
		}
		if !attrValueSame {
			err.Description = fmt.Sprintf("Attribute %v value is different", attr.Key)
			return err
		}
	}
	if len(originalNode.Attr) != len(expectedNode.Attr) {
		err.Description = "Different number of node attributes"
		return err
	}
	return CompareNodes(h5.Children(originalNode), h5.Children(expectedNode))
}
Exemple #6
0
// Replace constructs a TransformFunc that replaces a node with the nodes passed
// in.
func Replace(ns ...*html.Node) TransformFunc {
	return func(n *html.Node) {
		p := n.Parent
		switch p {
		case nil:
			panic(fmt.Sprintf("Attempt to replace Root node: %s", h5.RenderNodesToString([]*html.Node{n})))
		default:
			for _, nc := range ns {
				p.InsertBefore(nc, n)
			}
			p.RemoveChild(n)
		}
	}
}
Exemple #7
0
func getTitleNode(document *h5.Tree) (titleNode string, err error) {
	var chain *selector.Chain

	if chain, err = selector.Selector("title"); err != nil {
		return
	}

	if matches := chain.Find(document.Top()); len(matches) > 0 {
		match := matches[0:1]
		titleNode = h5.RenderNodesToString(match)
	}

	return
}
Exemple #8
0
func rewriteBody(containerSelector string, dest io.Writer, body string) (err error) {
	if containerSelector == "" {
		dest.Write([]byte(body))
		return
	}

	var chain *selector.Chain
	var document *h5.Tree

	if document, err = h5.NewFromString(body); err != nil {
		err = fmt.Errorf("invalid html document: %v", err)
		return
	}

	var titleNode string
	if titleNode, err = getTitleNode(document); err != nil {
		return
	}

	if chain, err = selector.Selector(containerSelector); err != nil {
		err = fmt.Errorf("invalid css: %v", containerSelector)
		return
	}

	if matches := chain.Find(document.Top()); len(matches) > 0 {
		match := matches[0:1] // Take only the first match
		newBody := h5.RenderNodesToString(h5.Children(match[0]))

		fmt.Printf("data: %v", h5.Data(match[0]))

		dest.Write([]byte(titleNode))
		dest.Write([]byte(newBody))
		return
	}

	err = fmt.Errorf("container not found")
	return
}
Exemple #9
0
func extractText(n *html.Node) string {
	return h5.RenderNodesToString([]*html.Node{n})
}