예제 #1
0
func TestBasic(t *testing.T) {
	s, _ := NewStore("/Users/JLoup/LAB/tmp")
	ctx := context.Background()
	resp := &http.Response{}

	b := []byte("yyydsdsdsdsdsoooooooo")

	r := bytes.NewReader(b)

	t.Log(r.Len())

	u, _ := url.Parse("http://www.host2.com/dir/document/yo")
	resp.Body = &RdCloser{r}
	s.Store(ctx, *u, resp)

	u, _ = url.Parse("http://www.host3.com/dir/")
	resp.Body = &RdCloser{r}
	s.Store(ctx, *u, resp)

	u, _ = url.Parse("http://www.host2.com/dir/document/")
	resp.Body = &RdCloser{r}
	s.Store(ctx, *u, resp)

	u, _ = url.Parse("http://www.host4.com/dir/yo")
	resp.Body = &RdCloser{r}
	s.Store(ctx, *u, resp)

	u, _ = url.Parse("http://www.host4.com/dir/document/yo")
	resp.Body = &RdCloser{r}
	s.Store(ctx, *u, resp)

}
예제 #2
0
파일: store.go 프로젝트: jloup/fetchbot
func (s *Store) Get(documentURL string) (*os.File, error) {
	URL, err := url.Parse(documentURL)

	if err != nil {
		return nil, err
	}

	if _, exists := s.hosts[URL.Host]; !exists {
		return nil, errorStore{What: fmt.Sprintf("no host '%v'", URL.Host)}
	}

	hash := hashString(URL.Path)

	_, exists := s.documentExists(URL.Host, hash)

	if !exists {
		return nil, errorStore{What: fmt.Sprintf("no path '%v' for '%v'", URL.Path, URL.Host)}
	}

	file, err := os.Open(filepath.Join(s.hostPath(URL.Host), hash))

	if err != nil {
		return nil, err
	}

	return file, err
}
예제 #3
0
func TestFrontierHostInOut(t *testing.T) {
	go timeoutTest(t, 21*time.Second)
	p := NewHostFrontier()

	u1, _ := url.Parse("http://www.google.fr/1")

	ctx := context.Background()
	go func() {
		for i := 0; i < 5; i++ {
			c, u, ok := p.NextUrl(ctx)
			if !ok {
				return
			}

			t.Logf("GET %v\n", u.String())

			p.Store(c, u, nil)

			if i != 3 {
				p.PushUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 1)
			} else {
				p.Stop()
			}
		}

	}()

	p.PushUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 1)

	<-p.Done
}
예제 #4
0
파일: limit.go 프로젝트: jloup/fetchbot
func (c Limiter) Get(ctx context.Context, URL string) (*http.Response, error) {
	req := newRequest()

	urlObj, err := url.Parse(URL)

	if err != nil {
		return nil, err
	}

	req.url = *urlObj

	// notify main routine that we want to access to an url
	c.reqChannel <- *req

	// wait for the access to be granted
	<-req.ready

	// fetch the resource at the URL
	resp, err := c.fetcher.Get(ctx, URL)

	// notify main routine we are done
	req.state = done
	c.reqChannel <- *req

	// wait that everything is cleaned up
	<-req.ready

	return resp, err
}
예제 #5
0
func TestPriorityChannel(t *testing.T) {
	go timeoutTest(t, 11*time.Second)
	p := newPriorityChannel()
	done := make(chan struct{})

	u1, _ := url.Parse("http://www.google.fr/1")
	u2, _ := url.Parse("http://www.google.fr/2")
	u3, _ := url.Parse("http://www.yahoo.fr/1")
	u4, _ := url.Parse("http://www.yahoo.fr/2")
	u5, _ := url.Parse("http://www.bing.fr/1")
	u6, _ := url.Parse("http://www.bing.fr/2")

	go func() {
		for u := range p.Out {
			hostName, _ := url.StripWWW(u.Url.Host)
			t.Log(u, hostName)
			p.doneWithHost(hostName)
		}
		done <- struct{}{}
	}()

	p.addUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 2)
	p.addUrl(frontier.UrlRequest{*u2, frontier.NoopCtxFunc}, 2)
	p.addUrl(frontier.UrlRequest{*u3, frontier.NoopCtxFunc}, 2)
	p.addUrl(frontier.UrlRequest{*u4, frontier.NoopCtxFunc}, 2)
	p.addUrl(frontier.UrlRequest{*u5, frontier.NoopCtxFunc}, 2)
	p.addUrl(frontier.UrlRequest{*u6, frontier.NoopCtxFunc}, 2)

	p.Stop()
	<-done
}
예제 #6
0
func TestFrontier(t *testing.T) {
	go timeoutTest(t, 22*time.Second)
	p := NewHostFrontier()

	u1, _ := url.Parse("http://www.google.fr/1")
	u2, _ := url.Parse("http://www.google.fr/2")
	u3, _ := url.Parse("http://www.yahoo.fr/1")
	u4, _ := url.Parse("http://www.yahoo.fr/2")
	u5, _ := url.Parse("http://www.bing.fr/1")
	u6, _ := url.Parse("http://www.bing.fr/2")

	done := make(chan struct{})
	ctx := context.Background()
	go func() {
		for {
			c, u, ok := p.NextUrl(ctx)
			if !ok {
				done <- struct{}{}
				return
			}
			t.Logf("GET %v\n", u.String())
			time.Sleep(2000 * time.Millisecond)

			p.Store(c, u, nil)
		}
	}()

	p.PushUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 1)
	p.PushUrl(frontier.UrlRequest{*u2, frontier.NoopCtxFunc}, 1)
	p.PushUrl(frontier.UrlRequest{*u2, frontier.NoopCtxFunc}, 1)
	p.PushUrl(frontier.UrlRequest{*u6, frontier.NoopCtxFunc}, 1)
	p.PushUrl(frontier.UrlRequest{*u5, frontier.NoopCtxFunc}, 1)
	p.PushUrl(frontier.UrlRequest{*u3, frontier.NoopCtxFunc}, 1)
	p.PushUrl(frontier.UrlRequest{*u4, frontier.NoopCtxFunc}, 1)

	p.Stop()
	<-done
}