func TestSelectors(t *testing.T) { for _, test := range selectorTests { s, err := Compile(test.selector) if err != nil { t.Errorf("error compiling %q: %s", test.selector, err) continue } doc, err := html.Parse(strings.NewReader(test.HTML)) if err != nil { t.Errorf("error parsing %q: %s", test.HTML, err) continue } matches := s.MatchAll(doc) if len(matches) != len(test.results) { t.Errorf("wanted %d elements, got %d instead", len(test.results), len(matches)) continue } for i, m := range matches { got := nodeString(m) if got != test.results[i] { t.Errorf("wanted %s, got %s instead", test.results[i], got) } } } }
func ExampleParse() { s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>` doc, err := html.Parse(strings.NewReader(s)) if err != nil { log.Fatal(err) } var f func(*html.Node) f = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "a" { for _, a := range n.Attr { if a.Key == "href" { fmt.Println(a.Val) break } } } for c := n.FirstChild; c != nil; c = c.NextSibling { f(c) } } f(doc) // Output: // foo // /bar/baz }
func TestNewDocument(t *testing.T) { if f, e := os.Open("./testdata/page.html"); e != nil { t.Error(e.Error()) } else { defer f.Close() if node, e := html.Parse(f); e != nil { t.Error(e.Error()) } else { doc = NewDocumentFromNode(node) } } }
func LoadDoc(page string) *Document { if f, e := os.Open(fmt.Sprintf("./testdata/%s", page)); e != nil { panic(e.Error()) } else { defer f.Close() if node, e := html.Parse(f); e != nil { panic(e.Error()) } else { return NewDocumentFromNode(node) } } return nil }
// NewDocument() is a Document constructor that takes a string URL as argument. // It loads the specified document, parses it, and stores the root Document // node, ready to be manipulated. func NewDocument(url string) (d *Document, e error) { // Load the URL res, e := http.Get(url) if e != nil { return } defer res.Body.Close() // Parse the HTML into nodes root, e := html.Parse(res.Body) if e != nil { return } // Create and fill the document d = newDocument(root, res.Request.URL) return }