func TestBasic(t *testing.T) { s, _ := NewStore("/Users/JLoup/LAB/tmp") ctx := context.Background() resp := &http.Response{} b := []byte("yyydsdsdsdsdsoooooooo") r := bytes.NewReader(b) t.Log(r.Len()) u, _ := url.Parse("http://www.host2.com/dir/document/yo") resp.Body = &RdCloser{r} s.Store(ctx, *u, resp) u, _ = url.Parse("http://www.host3.com/dir/") resp.Body = &RdCloser{r} s.Store(ctx, *u, resp) u, _ = url.Parse("http://www.host2.com/dir/document/") resp.Body = &RdCloser{r} s.Store(ctx, *u, resp) u, _ = url.Parse("http://www.host4.com/dir/yo") resp.Body = &RdCloser{r} s.Store(ctx, *u, resp) u, _ = url.Parse("http://www.host4.com/dir/document/yo") resp.Body = &RdCloser{r} s.Store(ctx, *u, resp) }
func (s *Store) Get(documentURL string) (*os.File, error) { URL, err := url.Parse(documentURL) if err != nil { return nil, err } if _, exists := s.hosts[URL.Host]; !exists { return nil, errorStore{What: fmt.Sprintf("no host '%v'", URL.Host)} } hash := hashString(URL.Path) _, exists := s.documentExists(URL.Host, hash) if !exists { return nil, errorStore{What: fmt.Sprintf("no path '%v' for '%v'", URL.Path, URL.Host)} } file, err := os.Open(filepath.Join(s.hostPath(URL.Host), hash)) if err != nil { return nil, err } return file, err }
func TestFrontierHostInOut(t *testing.T) { go timeoutTest(t, 21*time.Second) p := NewHostFrontier() u1, _ := url.Parse("http://www.google.fr/1") ctx := context.Background() go func() { for i := 0; i < 5; i++ { c, u, ok := p.NextUrl(ctx) if !ok { return } t.Logf("GET %v\n", u.String()) p.Store(c, u, nil) if i != 3 { p.PushUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 1) } else { p.Stop() } } }() p.PushUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 1) <-p.Done }
func (c Limiter) Get(ctx context.Context, URL string) (*http.Response, error) { req := newRequest() urlObj, err := url.Parse(URL) if err != nil { return nil, err } req.url = *urlObj // notify main routine that we want to access to an url c.reqChannel <- *req // wait for the access to be granted <-req.ready // fetch the resource at the URL resp, err := c.fetcher.Get(ctx, URL) // notify main routine we are done req.state = done c.reqChannel <- *req // wait that everything is cleaned up <-req.ready return resp, err }
func TestPriorityChannel(t *testing.T) { go timeoutTest(t, 11*time.Second) p := newPriorityChannel() done := make(chan struct{}) u1, _ := url.Parse("http://www.google.fr/1") u2, _ := url.Parse("http://www.google.fr/2") u3, _ := url.Parse("http://www.yahoo.fr/1") u4, _ := url.Parse("http://www.yahoo.fr/2") u5, _ := url.Parse("http://www.bing.fr/1") u6, _ := url.Parse("http://www.bing.fr/2") go func() { for u := range p.Out { hostName, _ := url.StripWWW(u.Url.Host) t.Log(u, hostName) p.doneWithHost(hostName) } done <- struct{}{} }() p.addUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 2) p.addUrl(frontier.UrlRequest{*u2, frontier.NoopCtxFunc}, 2) p.addUrl(frontier.UrlRequest{*u3, frontier.NoopCtxFunc}, 2) p.addUrl(frontier.UrlRequest{*u4, frontier.NoopCtxFunc}, 2) p.addUrl(frontier.UrlRequest{*u5, frontier.NoopCtxFunc}, 2) p.addUrl(frontier.UrlRequest{*u6, frontier.NoopCtxFunc}, 2) p.Stop() <-done }
func TestFrontier(t *testing.T) { go timeoutTest(t, 22*time.Second) p := NewHostFrontier() u1, _ := url.Parse("http://www.google.fr/1") u2, _ := url.Parse("http://www.google.fr/2") u3, _ := url.Parse("http://www.yahoo.fr/1") u4, _ := url.Parse("http://www.yahoo.fr/2") u5, _ := url.Parse("http://www.bing.fr/1") u6, _ := url.Parse("http://www.bing.fr/2") done := make(chan struct{}) ctx := context.Background() go func() { for { c, u, ok := p.NextUrl(ctx) if !ok { done <- struct{}{} return } t.Logf("GET %v\n", u.String()) time.Sleep(2000 * time.Millisecond) p.Store(c, u, nil) } }() p.PushUrl(frontier.UrlRequest{*u1, frontier.NoopCtxFunc}, 1) p.PushUrl(frontier.UrlRequest{*u2, frontier.NoopCtxFunc}, 1) p.PushUrl(frontier.UrlRequest{*u2, frontier.NoopCtxFunc}, 1) p.PushUrl(frontier.UrlRequest{*u6, frontier.NoopCtxFunc}, 1) p.PushUrl(frontier.UrlRequest{*u5, frontier.NoopCtxFunc}, 1) p.PushUrl(frontier.UrlRequest{*u3, frontier.NoopCtxFunc}, 1) p.PushUrl(frontier.UrlRequest{*u4, frontier.NoopCtxFunc}, 1) p.Stop() <-done }