func MustParseHTML(doc string) *html.Node { dom, err := html.Parse(strings.NewReader(doc)) if err != nil { panic(err) } return dom }
// NewDocumentFromReader returns a Document from a generic reader. // It returns an error as second value if the reader's data cannot be parsed // as html. It does *not* check if the reader is also an io.Closer, so the // provided reader is never closed by this call, it is the responsibility // of the caller to close it if required. func NewDocumentFromReader(r io.Reader) (*Document, error) { root, e := html.Parse(r) if e != nil { return nil, e } return newDocument(root, nil), nil }
func ExampleParse() { s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>` doc, err := html.Parse(strings.NewReader(s)) if err != nil { log.Fatal(err) } var f func(*html.Node) f = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "a" { for _, a := range n.Attr { if a.Key == "href" { fmt.Println(a.Val) break } } } for c := n.FirstChild; c != nil; c = c.NextSibling { f(c) } } f(doc) // Output: // foo // /bar/baz }
func TestNewDocument(t *testing.T) { if f, e := os.Open("./testdata/page.html"); e != nil { t.Error(e.Error()) } else { defer f.Close() if node, e := html.Parse(f); e != nil { t.Error(e.Error()) } else { doc = NewDocumentFromNode(node) } } }
func loadDoc(page string) *Document { var f *os.File var e error if f, e = os.Open(fmt.Sprintf("./testdata/%s", page)); e != nil { panic(e.Error()) } defer f.Close() var node *html.Node if node, e = html.Parse(f); e != nil { panic(e.Error()) } return NewDocumentFromNode(node) }
func TestApacheModInfoServerSimple(t *testing.T) { n, err := html.Parse(strings.NewReader(apacheModInfoServerHTML)) if err != nil { t.Errorf("unable to parse ?server status page") } c, k, err := extractTimeouts(n) if c != 60 || k != 15 || err != nil { t.Errorf("Expected 60,15,<nil> got %v,%v,%v", c, k, err) } mpm_d, mpm_t, mpm_f, err := extractMpmInfo(n) if (mpm_d != 64) || (mpm_t != false) || (mpm_f != true) || (err != nil) { t.Errorf("Expected 64,false,true,<nil> got %v,%v,%v,%v", mpm_d, mpm_t, mpm_f, err) } }
// NewDocumentFromResponse is another Document constructor that takes an http response as argument. // It loads the specified response's document, parses it, and stores the root Document // node, ready to be manipulated. The response's body is closed on return. func NewDocumentFromResponse(res *http.Response) (*Document, error) { if res == nil { return nil, errors.New("Response is nil") } defer res.Body.Close() if res.Request == nil { return nil, errors.New("Response.Request is nil") } // Parse the HTML into nodes root, e := html.Parse(res.Body) if e != nil { return nil, e } // Create and fill the document return newDocument(root, res.Request.URL), nil }
func TestApacheModInfoEmpty(t *testing.T) { n, err := html.Parse(strings.NewReader("")) if err != nil { t.Errorf("unable to parse ?server status page") } c, k, err := extractTimeouts(n) if c != 0 || k != 0 || err == nil { t.Errorf("Expected 0,0,ERROR got %v,%v,%v", c, k, err) } mpm_d, mpm_t, mpm_f, err := extractMpmInfo(n) if (mpm_d != 0) || (mpm_t != false) || (mpm_f != false) || (err == nil) { t.Errorf("Expected 0,false,false,<nil> got %v,%v,%v,%v", mpm_d, mpm_t, mpm_f, err) } }
func TestSelectors(t *testing.T) { for _, test := range selectorTests { s, err := Compile(test.selector) if err != nil { t.Errorf("error compiling %q: %s", test.selector, err) continue } doc, err := html.Parse(strings.NewReader(test.HTML)) if err != nil { t.Errorf("error parsing %q: %s", test.HTML, err) continue } matches := s.MatchAll(doc) if len(matches) != len(test.results) { t.Errorf("wanted %d elements, got %d instead", len(test.results), len(matches)) continue } for i, m := range matches { got := nodeString(m) if got != test.results[i] { t.Errorf("wanted %s, got %s instead", test.results[i], got) } } firstMatch := s.MatchFirst(doc) if len(test.results) == 0 { if firstMatch != nil { t.Errorf("MatchFirst: want nil, got %s", nodeString(firstMatch)) } } else { got := nodeString(firstMatch) if got != test.results[0] { t.Errorf("MatchFirst: want %s, got %s", test.results[0], got) } } } }
func c_apache_mod_info() (opentsdb.MultiDataPoint, error) { var md opentsdb.MultiDataPoint resp, err := http.Get(apacheModInfoBaseURL + "?server") if err != nil { return nil, err } defer resp.Body.Close() n, err := html.Parse(resp.Body) if err != nil { return nil, fmt.Errorf("unable to parse ?server status page") } connection_timeout, keepalive, err := extractTimeouts(n) if err != nil { return nil, err } Add(&md, "apache.server.timeout.connection", connection_timeout, nil, metadata.Gauge, metadata.Second, "") Add(&md, "apache.server.timeout.keepalive", keepalive, nil, metadata.Gauge, metadata.Second, "") max_daemons, is_threaded, is_forked, err := extractMpmInfo(n) if err != nil { return nil, err } is_threaded_num := 0 if is_threaded { is_threaded_num = 1 } is_forked_num := 0 if is_forked { is_forked_num = 1 } Add(&md, "apache.mpm.daemons_max", max_daemons, nil, metadata.Gauge, metadata.Bool, "") Add(&md, "apache.mpm.threaded", is_threaded_num, nil, metadata.Gauge, metadata.Bool, "") Add(&md, "apache.mpm.forked", is_forked_num, nil, metadata.Gauge, metadata.Bool, "") return md, nil }