예제 #1
0
func TestContinueCrawling(t *testing.T) {
	d, _ := db.NewMapConn()
	c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 1))
	assert.False(t, c.continueCrawling())

	c = newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 0))
	assert.True(t, c.continueCrawling())
}
예제 #2
0
// connectQueue attempts to connect with the cluster of Gnatsd servers.
// It exits the program if the connection fails.
// It falls back to a channel pool if the nats servers are not configured.
func connectQueue(d db.Connection) queue.Connection {
	if servers, ok := ParseNatsNodes(); ok {
		return ConnectNatsQueue(servers, d)
	}

	return queue.NewPoolConn(d)
}
예제 #3
0
func TestCrawlDocuments(t *testing.T) {
	testCases := []struct {
		id     string
		page   string
		images int
	}{
		{
			id:     "test1",
			page:   "simple_page.html",
			images: 1,
		},
		{
			id:     "test2",
			page:   "multi_images.html",
			images: 2,
		},
		{
			id:     "test3",
			page:   "empty_page.html",
			images: 0,
		},
	}

	for _, e := range testCases {
		d, _ := db.NewMapConn()
		c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage(e.id, "http://example.com", 1))

		doc := loadPage(t, e.page)
		x := loadContext(t, "http://example.com")
		c.crawlDocument(x, doc)

		r, _ := d.Results(e.id)
		assert.Equal(t, e.images, len(r))
	}
}
예제 #4
0
func TestCrawl(t *testing.T) {
	d, _ := db.NewMapConn()
	q := queue.NewPoolConn(d)

	counter := 0
	processor := func(q queue.Connection, d db.Connection, msg *queue.Message) {
		counter++
	}
	q.Subscribe(processor)

	x := context.Context{d, q}

	s := newServer(x)
	r, _ := http.NewRequest("GET", "http://example.com", strings.NewReader("http://example.com"))

	p := make(httprouter.Params, 0)
	w := httptest.NewRecorder()
	s.crawl(w, r, p)

	b, err := ioutil.ReadAll(w.Body)
	assert.NoError(t, err)
	assert.NotEmpty(t, b)

	j := string(b)
	assert.Equal(t, fmt.Sprintf("/status/%s", j), w.Header().Get("Location"))

	assert.Equal(t, 201, w.Code)
}
예제 #5
0
func TestCrawlHref(t *testing.T) {
	d, _ := db.NewMapConn()
	p := queue.NewPoolConn(d)
	c := newCrawler(d, p, queue.NewMessage("test", "http://example.com", 0))
	x := loadContext(t, "http://example.com")

	done := make(chan bool)
	processor := func(q queue.Connection, d db.Connection, msg *queue.Message) {
		doc := loadPage(t, "simple_page.html")
		c.crawlDocument(x, doc)
		done <- true
	}

	p.Subscribe(processor)

	doc := loadPage(t, "follow_index.html")
	c.crawlDocument(x, doc)

	<-done
	r, _ := d.Results("test")
	assert.Equal(t, 1, len(r))

	assert.Equal(t, "http://example.com/images/logo.jpg", string(r[0]))
}