func TestContinueCrawling(t *testing.T) { d, _ := db.NewMapConn() c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 1)) assert.False(t, c.continueCrawling()) c = newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 0)) assert.True(t, c.continueCrawling()) }
func TestCrawlDocuments(t *testing.T) { testCases := []struct { id string page string images int }{ { id: "test1", page: "simple_page.html", images: 1, }, { id: "test2", page: "multi_images.html", images: 2, }, { id: "test3", page: "empty_page.html", images: 0, }, } for _, e := range testCases { d, _ := db.NewMapConn() c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage(e.id, "http://example.com", 1)) doc := loadPage(t, e.page) x := loadContext(t, "http://example.com") c.crawlDocument(x, doc) r, _ := d.Results(e.id) assert.Equal(t, e.images, len(r)) } }
func TestCrawlHref(t *testing.T) { d, _ := db.NewMapConn() p := queue.NewPoolConn(d) c := newCrawler(d, p, queue.NewMessage("test", "http://example.com", 0)) x := loadContext(t, "http://example.com") done := make(chan bool) processor := func(q queue.Connection, d db.Connection, msg *queue.Message) { doc := loadPage(t, "simple_page.html") c.crawlDocument(x, doc) done <- true } p.Subscribe(processor) doc := loadPage(t, "follow_index.html") c.crawlDocument(x, doc) <-done r, _ := d.Results("test") assert.Equal(t, 1, len(r)) assert.Equal(t, "http://example.com/images/logo.jpg", string(r[0])) }