func TestFound(t *testing.T) { d, _ := db.NewMapConn() d.ViewPage("test", "http://example.com") d.Processing("test") d.Save("test", "http://example.com/image.jpg") x := context.Context{d, nil} s := newServer(x) r, _ := http.NewRequest("GET", "http://example.com", nil) p := make(httprouter.Params, 1) p[0].Key = "jobUUID" p[0].Value = "test" w := httptest.NewRecorder() s.status(w, r, p) assert.Equal(t, 200, w.Code) b, err := ioutil.ReadAll(w.Body) assert.NoError(t, err) assert.Equal(t, "- Processing: 1 URLs\n- Done: 0 URLs\n- Page views:\n\t- http://example.com -> 1 hit", string(b)) w = httptest.NewRecorder() s.results(w, r, p) assert.Equal(t, 200, w.Code) b, err = ioutil.ReadAll(w.Body) assert.NoError(t, err) assert.Equal(t, "http://example.com/image.jpg\n", string(b)) }
func TestCrawlDocuments(t *testing.T) { testCases := []struct { id string page string images int }{ { id: "test1", page: "simple_page.html", images: 1, }, { id: "test2", page: "multi_images.html", images: 2, }, { id: "test3", page: "empty_page.html", images: 0, }, } for _, e := range testCases { d, _ := db.NewMapConn() c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage(e.id, "http://example.com", 1)) doc := loadPage(t, e.page) x := loadContext(t, "http://example.com") c.crawlDocument(x, doc) r, _ := d.Results(e.id) assert.Equal(t, e.images, len(r)) } }
func TestCrawl(t *testing.T) { d, _ := db.NewMapConn() q := queue.NewPoolConn(d) counter := 0 processor := func(q queue.Connection, d db.Connection, msg *queue.Message) { counter++ } q.Subscribe(processor) x := context.Context{d, q} s := newServer(x) r, _ := http.NewRequest("GET", "http://example.com", strings.NewReader("http://example.com")) p := make(httprouter.Params, 0) w := httptest.NewRecorder() s.crawl(w, r, p) b, err := ioutil.ReadAll(w.Body) assert.NoError(t, err) assert.NotEmpty(t, b) j := string(b) assert.Equal(t, fmt.Sprintf("/status/%s", j), w.Header().Get("Location")) assert.Equal(t, 201, w.Code) }
func TestContinueCrawling(t *testing.T) { d, _ := db.NewMapConn() c := newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 1)) assert.False(t, c.continueCrawling()) c = newCrawler(d, queue.NewPoolConn(d), queue.NewMessage("test", "http://example.com", 0)) assert.True(t, c.continueCrawling()) }
// connectDb attempts to connect with a database. // The prefered database engine is Riak, but it falls back to a in memory map if Riak is not configured. // It exits the program if the connection fails. func connectDb() db.Connection { if h, ok := ParseRiakHost(); ok { return ConnectRiakDb(h) } m, _ := db.NewMapConn() return m }
func TestGnatsdSuite(t *testing.T) { if h, ok := context.ParseNatsNodes(); ok { d, _ := db.NewMapConn() s := &GnatsdTestSuite{ conn: context.ConnectNatsQueue(h, d), } suite.Run(t, s) } }
func TestCreateJob(t *testing.T) { d, _ := db.NewMapConn() x := context.Context{d, nil} s := newServer(x) j, err := s.createNewJob() assert.NoError(t, err) assert.NotNil(t, j) }
func TestPoolConn(t *testing.T) { d, _ := db.NewMapConn() q := NewPoolConn(d) done := make(chan bool) processor := func(q Connection, d db.Connection, m *Message) { d.Save(m.JobUUID, m.URL) done <- true } q.Subscribe(processor) q.Publish("test", "http://example.com", 0) <-done r, _ := d.Results("test") assert.Equal(t, 1, len(r)) }
func TestNotFound(t *testing.T) { d, _ := db.NewMapConn() x := context.Context{d, nil} s := newServer(x) r, _ := http.NewRequest("GET", "http://example.com", nil) p := make(httprouter.Params, 1) p[0].Key = "jobUUID" p[0].Value = "test" w := httptest.NewRecorder() s.status(w, r, p) assert.Equal(t, 404, w.Code) w = httptest.NewRecorder() s.results(w, r, p) assert.Equal(t, 404, w.Code) }
func TestCrawlHref(t *testing.T) { d, _ := db.NewMapConn() p := queue.NewPoolConn(d) c := newCrawler(d, p, queue.NewMessage("test", "http://example.com", 0)) x := loadContext(t, "http://example.com") done := make(chan bool) processor := func(q queue.Connection, d db.Connection, msg *queue.Message) { doc := loadPage(t, "simple_page.html") c.crawlDocument(x, doc) done <- true } p.Subscribe(processor) doc := loadPage(t, "follow_index.html") c.crawlDocument(x, doc) <-done r, _ := d.Results("test") assert.Equal(t, 1, len(r)) assert.Equal(t, "http://example.com/images/logo.jpg", string(r[0])) }