Example #1
0
func TestContinueCrawling(t *testing.T) {
	d, _ := db.NewMapConn()
	c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 1))
	assert.False(t, c.continueCrawling())

	c = newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 0))
	assert.True(t, c.continueCrawling())
}
Example #2
0
func TestCrawlDocuments(t *testing.T) {
	testCases := []struct {
		id     string
		page   string
		images int
	}{
		{
			id:     "test1",
			page:   "simple_page.html",
			images: 1,
		},
		{
			id:     "test2",
			page:   "multi_images.html",
			images: 2,
		},
		{
			id:     "test3",
			page:   "empty_page.html",
			images: 0,
		},
	}

	for _, e := range testCases {
		d, _ := db.NewMapConn()
		c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage(e.id, "http://example.com", 1))

		doc := loadPage(t, e.page)
		x := loadContext(t, "http://example.com")
		c.crawlDocument(x, doc)

		r, _ := d.Results(e.id)
		assert.Equal(t, e.images, len(r))
	}
}
Example #3
0
func TestCrawlHref(t *testing.T) {
	d, _ := db.NewMapConn()
	p := queue.NewPoolConn(d)
	c := newCrawler(d, p, queue.NewMessage("test", "http://example.com", 0))
	x := loadContext(t, "http://example.com")

	done := make(chan bool)
	processor := func(q queue.Connection, d db.Connection, msg *queue.Message) {
		doc := loadPage(t, "simple_page.html")
		c.crawlDocument(x, doc)
		done <- true
	}

	p.Subscribe(processor)

	doc := loadPage(t, "follow_index.html")
	c.crawlDocument(x, doc)

	<-done
	r, _ := d.Results("test")
	assert.Equal(t, 1, len(r))

	assert.Equal(t, "http://example.com/images/logo.jpg", string(r[0]))
}