// NewDocumentFromReader returns a Document from a generic reader. // It returns an error as second value if the reader's data cannot be parsed // as html. It does *not* check if the reader is also an io.Closer, so the // provided reader is never closed by this call, it is the responsibility // of the caller to close it if required. func NewDocumentFromReader(r io.Reader) (*Document, error) { root, e := html.Parse(r) if e != nil { return nil, e } return newDocument(root, nil), nil }
func MustParseHTML(doc string) *html.Node { dom, err := html.Parse(strings.NewReader(doc)) if err != nil { panic(err) } return dom }
func ExampleParse() { s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>` doc, err := html.Parse(strings.NewReader(s)) if err != nil { log.Fatal(err) } var f func(*html.Node) f = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "a" { for _, a := range n.Attr { if a.Key == "href" { fmt.Println(a.Val) break } } } for c := n.FirstChild; c != nil; c = c.NextSibling { f(c) } } f(doc) // Output: // foo // /bar/baz }
func TestNewDocument(t *testing.T) { if f, e := os.Open("./testdata/page.html"); e != nil { t.Error(e.Error()) } else { defer f.Close() if node, e := html.Parse(f); e != nil { t.Error(e.Error()) } else { doc = NewDocumentFromNode(node) } } }
func loadDoc(page string) *Document { var f *os.File var e error if f, e = os.Open(fmt.Sprintf("./testdata/%s", page)); e != nil { panic(e.Error()) } defer f.Close() var node *html.Node if node, e = html.Parse(f); e != nil { panic(e.Error()) } return NewDocumentFromNode(node) }
// NewDocumentFromResponse is another Document constructor that takes an http response as argument. // It loads the specified response's document, parses it, and stores the root Document // node, ready to be manipulated. The response's body is closed on return. func NewDocumentFromResponse(res *http.Response) (*Document, error) { if res == nil { return nil, errors.New("Response is nil pointer") } defer res.Body.Close() // Parse the HTML into nodes root, e := html.Parse(res.Body) if e != nil { return nil, e } // Create and fill the document return newDocument(root, res.Request.URL), nil }
func TestSelectors(t *testing.T) { for _, test := range selectorTests { s, err := Compile(test.selector) if err != nil { t.Errorf("error compiling %q: %s", test.selector, err) continue } doc, err := html.Parse(strings.NewReader(test.HTML)) if err != nil { t.Errorf("error parsing %q: %s", test.HTML, err) continue } matches := s.MatchAll(doc) if len(matches) != len(test.results) { t.Errorf("wanted %d elements, got %d instead", len(test.results), len(matches)) continue } for i, m := range matches { got := nodeString(m) if got != test.results[i] { t.Errorf("wanted %s, got %s instead", test.results[i], got) } } firstMatch := s.MatchFirst(doc) if len(test.results) == 0 { if firstMatch != nil { t.Errorf("MatchFirst: want nil, got %s", nodeString(firstMatch)) } } else { got := nodeString(firstMatch) if got != test.results[0] { t.Errorf("MatchFirst: want %s, got %s", test.results[0], got) } } } }