示例#1
0
func TestFound(t *testing.T) {
	d, _ := db.NewMapConn()
	d.ViewPage("test", "http://example.com")
	d.Processing("test")
	d.Save("test", "http://example.com/image.jpg")

	x := context.Context{d, nil}

	s := newServer(x)

	r, _ := http.NewRequest("GET", "http://example.com", nil)

	p := make(httprouter.Params, 1)
	p[0].Key = "jobUUID"
	p[0].Value = "test"

	w := httptest.NewRecorder()
	s.status(w, r, p)
	assert.Equal(t, 200, w.Code)

	b, err := ioutil.ReadAll(w.Body)
	assert.NoError(t, err)
	assert.Equal(t, "- Processing: 1 URLs\n- Done: 0 URLs\n- Page views:\n\t- http://example.com -> 1 hit", string(b))

	w = httptest.NewRecorder()
	s.results(w, r, p)
	assert.Equal(t, 200, w.Code)

	b, err = ioutil.ReadAll(w.Body)
	assert.NoError(t, err)
	assert.Equal(t, "http://example.com/image.jpg\n", string(b))
}
示例#2
0
func TestCrawlDocuments(t *testing.T) {
	testCases := []struct {
		id     string
		page   string
		images int
	}{
		{
			id:     "test1",
			page:   "simple_page.html",
			images: 1,
		},
		{
			id:     "test2",
			page:   "multi_images.html",
			images: 2,
		},
		{
			id:     "test3",
			page:   "empty_page.html",
			images: 0,
		},
	}

	for _, e := range testCases {
		d, _ := db.NewMapConn()
		c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage(e.id, "http://example.com", 1))

		doc := loadPage(t, e.page)
		x := loadContext(t, "http://example.com")
		c.crawlDocument(x, doc)

		r, _ := d.Results(e.id)
		assert.Equal(t, e.images, len(r))
	}
}
示例#3
0
func TestCrawl(t *testing.T) {
	d, _ := db.NewMapConn()
	q := queue.NewPoolConn(d)

	counter := 0
	processor := func(q queue.Connection, d db.Connection, msg *queue.Message) {
		counter++
	}
	q.Subscribe(processor)

	x := context.Context{d, q}

	s := newServer(x)
	r, _ := http.NewRequest("GET", "http://example.com", strings.NewReader("http://example.com"))

	p := make(httprouter.Params, 0)
	w := httptest.NewRecorder()
	s.crawl(w, r, p)

	b, err := ioutil.ReadAll(w.Body)
	assert.NoError(t, err)
	assert.NotEmpty(t, b)

	j := string(b)
	assert.Equal(t, fmt.Sprintf("/status/%s", j), w.Header().Get("Location"))

	assert.Equal(t, 201, w.Code)
}
示例#4
0
func TestContinueCrawling(t *testing.T) {
	d, _ := db.NewMapConn()
	c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 1))
	assert.False(t, c.continueCrawling())

	c = newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 0))
	assert.True(t, c.continueCrawling())
}
示例#5
0
// connectDb attempts to connect with a database.
// The prefered database engine is Riak, but it falls back to a in memory map if Riak is not configured.
// It exits the program if the connection fails.
func connectDb() db.Connection {
	if h, ok := ParseRiakHost(); ok {
		return ConnectRiakDb(h)
	}

	m, _ := db.NewMapConn()
	return m
}
示例#6
0
func TestGnatsdSuite(t *testing.T) {
	if h, ok := context.ParseNatsNodes(); ok {
		d, _ := db.NewMapConn()
		s := &GnatsdTestSuite{
			conn: context.ConnectNatsQueue(h, d),
		}
		suite.Run(t, s)
	}
}
示例#7
0
func TestCreateJob(t *testing.T) {
	d, _ := db.NewMapConn()
	x := context.Context{d, nil}

	s := newServer(x)
	j, err := s.createNewJob()
	assert.NoError(t, err)
	assert.NotNil(t, j)
}
示例#8
0
func TestPoolConn(t *testing.T) {
	d, _ := db.NewMapConn()
	q := NewPoolConn(d)

	done := make(chan bool)
	processor := func(q Connection, d db.Connection, m *Message) {
		d.Save(m.JobUUID, m.URL)
		done <- true
	}

	q.Subscribe(processor)

	q.Publish("test", "http://example.com", 0)
	<-done

	r, _ := d.Results("test")
	assert.Equal(t, 1, len(r))
}
示例#9
0
func TestNotFound(t *testing.T) {
	d, _ := db.NewMapConn()
	x := context.Context{d, nil}

	s := newServer(x)

	r, _ := http.NewRequest("GET", "http://example.com", nil)

	p := make(httprouter.Params, 1)
	p[0].Key = "jobUUID"
	p[0].Value = "test"

	w := httptest.NewRecorder()
	s.status(w, r, p)
	assert.Equal(t, 404, w.Code)

	w = httptest.NewRecorder()
	s.results(w, r, p)
	assert.Equal(t, 404, w.Code)
}
示例#10
0
func TestCrawlHref(t *testing.T) {
	d, _ := db.NewMapConn()
	p := queue.NewPoolConn(d)
	c := newCrawler(d, p, queue.NewMessage("test", "http://example.com", 0))
	x := loadContext(t, "http://example.com")

	done := make(chan bool)
	processor := func(q queue.Connection, d db.Connection, msg *queue.Message) {
		doc := loadPage(t, "simple_page.html")
		c.crawlDocument(x, doc)
		done <- true
	}

	p.Subscribe(processor)

	doc := loadPage(t, "follow_index.html")
	c.crawlDocument(x, doc)

	<-done
	r, _ := d.Results("test")
	assert.Equal(t, 1, len(r))

	assert.Equal(t, "http://example.com/images/logo.jpg", string(r[0]))
}