Exemple #1
0
func TestURLResolve(t *testing.T) {
	tests := []struct {
		in  string
		out string
		err bool
	}{
		{in: "localhost", out: "127.0.0.1"},
		{in: "google.com"},
		{in: "some.weird.hostname.example.com", err: true},
	}

	for _, tt := range tests {
		u, _ := urlx.Parse(tt.in)
		ip, err := urlx.Resolve(u)
		if !tt.err && err != nil {
			t.Errorf(`%v: unexpected error \"%v\"`, tt.in, err)
			continue
		}
		if tt.err && err == nil {
			t.Errorf(`%v: expected error`, tt.in)
		}
		if tt.out != "" && tt.out != fmt.Sprint(ip) {
			t.Errorf(`%v: got "%v", want "%v"`, tt.in, ip, tt.out)
		}
	}

}
Exemple #2
0
func (hf HttpFetcher) GetAll(urls []string) ([]*HttpFetcherResponse, error) {
	m := metrics.GetOrRegisterTimer("fn.FetchRemoteData", nil) // TODO: update metric name
	defer m.UpdateSince(time.Now())

	resps := make([]*HttpFetcherResponse, len(urls))

	var wg sync.WaitGroup
	wg.Add(len(urls))

	// TODO: add thruput here..

	for i, urlStr := range urls {
		resps[i] = &HttpFetcherResponse{}

		go func(resp *HttpFetcherResponse) {
			defer wg.Done()

			url, err := urlx.Parse(urlStr)
			if err != nil {
				resp.Err = err
				return
			}
			resp.URL = url

			lg.Infof("Fetching %s", url.String())

			fetch, err := hf.client().Get(url.String())
			if err != nil {
				lg.Warnf("Error fetching %s because %s", url.String(), err)
				resp.Err = err
				return
			}
			defer fetch.Body.Close()

			resp.Status = fetch.StatusCode

			body, err := ioutil.ReadAll(fetch.Body)
			if err != nil {
				resp.Err = err
				return
			}
			resp.Data = body
			resp.Err = nil

		}(resps[i])
	}

	wg.Wait()
	return resps, nil
}
Exemple #3
0
func (f Fetcher) GetAll(ctx context.Context, urls []string) ([]*FetcherResponse, error) {
	defer metrics.MeasureSince([]string{"fn.FetchRemoteData"}, time.Now())

	fetches := make([]*FetcherResponse, len(urls))

	var wg sync.WaitGroup
	wg.Add(len(urls))

	// TODO: add thruput here..

	for i, urlStr := range urls {
		fetches[i] = &FetcherResponse{}

		go func(fetch *FetcherResponse) {
			defer wg.Done()

			url, err := urlx.Parse(urlStr)
			if err != nil {
				fetch.Err = err
				return
			}
			fetch.URL = url

			lg.Infof("Fetching %s", url.String())

			resp, err := ctxhttp.Get(ctx, f.client(), url.String())
			if err != nil {
				lg.Warnf("Error fetching %s because %s", url.String(), err)
				fetch.Err = err
				return
			}
			defer resp.Body.Close()

			fetch.Status = resp.StatusCode

			body, err := ioutil.ReadAll(resp.Body)
			if err != nil {
				fetch.Err = err
				return
			}
			fetch.Data = body
			fetch.Err = nil

		}(fetches[i])
	}

	wg.Wait()
	return fetches, nil
}
Exemple #4
0
func BucketFetchItem(ctx context.Context, w http.ResponseWriter, r *http.Request) {
	bucket, err := NewBucket(chi.URLParams(ctx)["bucket"])
	if err != nil {
		respond.ImageError(w, 422, err)
		return
	}

	fetchUrl := r.URL.Query().Get("url")
	if fetchUrl == "" {
		respond.ImageError(w, 422, ErrInvalidURL)
		return
	}

	u, err := urlx.Parse(fetchUrl)
	if err != nil {
		respond.ImageError(w, 422, ErrInvalidURL)
		return
	}
	fetchUrl = u.String()

	imKey := sha1Hash(fetchUrl) // transform to what is expected..
	chi.URLParams(ctx)["key"] = imKey

	// First check if we have the original.. a bit of extra overhead, but its okay
	_, err = bucket.DbFindImage(ctx, imKey, nil)
	if err != nil && err != ErrImageNotFound {
		respond.ImageError(w, 422, err)
		return
	}

	// Fetch the image on-demand and add to bucket if we dont have it
	if err == ErrImageNotFound {
		// TODO: add image sizing throttler here....

		_, err := bucket.AddImagesFromUrls(ctx, []string{fetchUrl})
		if err != nil {
			lg.Errorf("Fetching failed for %s because %s", fetchUrl, err)
			respond.ImageError(w, 422, err)
			return
		}
	}

	BucketGetItem(ctx, w, r)
}
Exemple #5
0
func TestParse(t *testing.T) {
	tests := []struct {
		in  string
		out string
		err bool
	}{
		// Error out on missing host:
		{in: "", err: true},
		{in: "/", err: true},
		{in: "//", err: true},

		// Test schemes:
		{in: "http://example.com", out: "http://example.com"},
		{in: "HTTP://x.example.com", out: "http://x.example.com"},
		{in: "http://localhost", out: "http://localhost"},
		{in: "http://user.local", out: "http://user.local"},
		{in: "https://example.com", out: "https://example.com"},
		{in: "HTTPS://example.com", out: "https://example.com"},
		{in: "ssh://example.com:22", out: "ssh://example.com:22"},
		{in: "jabber://example.com:5222", out: "jabber://example.com:5222"},

		// Leading double slashes (any scheme) defaults to http:
		{in: "//example.com", out: "http://example.com"},

		// Empty scheme defaults to http:
		{in: "localhost", out: "http://localhost"},
		{in: "LOCALHOST", out: "http://localhost"},
		{in: "localhost:80", out: "http://*****:*****@example.com", out: "http://[email protected]"},
		{in: "user:[email protected]", out: "http://*****:*****@example.com"},
		{in: "https://*****:*****@subsub.sub.example.com", out: "https://*****:*****@subsub.sub.example.com"},

		// Lowercase scheme and host by default. Let net/url normalize URL by default:
		{in: "hTTp://subSUB.sub.EXAMPLE.COM/x//////y///foo.mp3?c=z&a=x&b=y#t=20", out: "http://subsub.sub.example.com/x//////y///foo.mp3?c=z&a=x&b=y#t=20"},

		// IDNA Punycode domains.
		// TODO: net/url escapes all the fields in String() method. Should we fix it?
		{in: "http://www.žluťoučký-kůň.cz/úpěl-ďábelské-ódy", out: "http://www.%C5%BElu%C5%A5ou%C4%8Dk%C3%BD-k%C5%AF%C5%88.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"},
		{in: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/úpěl-ďábelské-ódy", out: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"},
		{in: "http://żółć.pl/żółć.html", out: "http://%C5%BC%C3%B3%C5%82%C4%87.pl/%C5%BC%C3%B3%C5%82%C4%87.html"},
		{in: "http://xn--kda4b0koi.pl/żółć.html", out: "http://xn--kda4b0koi.pl/%C5%BC%C3%B3%C5%82%C4%87.html"},

		// IANA TLDs.
		// TODO: net/url escapes all the fields in String() method. Should we fix it?
		{in: "https://pressly.餐厅", out: "https://pressly.%E9%A4%90%E5%8E%85"},
		{in: "https://pressly.组织机构", out: "https://pressly.%E7%BB%84%E7%BB%87%E6%9C%BA%E6%9E%84"},

		// Some obviously wrong data:
		{in: "", err: true},
		{in: "javascript:evilFunction()", err: true},
		{in: "otherscheme:garbage", err: true},
		{in: "<funnnytag>", err: true},
	}

	for _, tt := range tests {
		url, err := urlx.Parse(tt.in)
		if err != nil {
			if !tt.err {
				t.Errorf(`"%s": unexpected error "%v"`, tt.in, err)
			}
			continue
		}
		if tt.err && err == nil {
			t.Errorf(`"%s": expected error`, tt.in)
			continue
		}
		if url.String() != tt.out {
			t.Errorf(`"%s": got "%s", want "%v"`, tt.in, url, tt.out)
		}
	}
}
Exemple #6
0
func TestURLNormalize(t *testing.T) {
	tests := []struct {
		in  string
		out string
		err bool
	}{
		// Remove unnecessary host dots:
		// Purell bug? They claim the following works..
		//{in: "http://..example..com../index.html", out: "http://example.com/index.html"},

		// Remove default port:
		{in: "http://example.com:80/index.html", out: "http://example.com/index.html"},
		{in: "localhost:80", out: "http://localhost"},
		{in: "127.0.0.1:80", out: "http://127.0.0.1"},
		{in: "[2001:db8:a0b:12f0::1]:80", out: "http://[2001:db8:a0b:12f0::1]"},

		// Empty scheme defaults to http:
		{in: "localhost", out: "http://localhost"},
		{in: "LOCALHOST", out: "http://localhost"},
		{in: "localhost:80", out: "http://localhost"},
		{in: "localhost:8080", out: "http://localhost:8080"},
		{in: "user.local", out: "http://user.local"},
		{in: "user.local:80", out: "http://user.local"},
		{in: "user.local:8080", out: "http://user.local:8080"},
		{in: "127.0.0.1", out: "http://127.0.0.1"},
		{in: "127.0.0.1:80", out: "http://127.0.0.1"},
		{in: "127.0.0.1:8080", out: "http://127.0.0.1:8080"},
		{in: "[2001:db8:a0b:12f0::1]", out: "http://[2001:db8:a0b:12f0::1]"},
		{in: "[2001:db8:a0b:12f0::1]:80", out: "http://[2001:db8:a0b:12f0::1]"},
		{in: "[2001:db8:a0b:12f0::1]:8080", out: "http://[2001:db8:a0b:12f0::1]:8080"},
		{in: "[2001:db8:a0b:12f0::80]", out: "http://[2001:db8:a0b:12f0::80]"},
		{in: "[2001:db8:a0b:12f0::80]:80", out: "http://[2001:db8:a0b:12f0::80]"},
		{in: "[2001:db8:a0b:12f0::80]:8080", out: "http://[2001:db8:a0b:12f0::80]:8080"},
		{in: "http://localhost:8080", out: "http://localhost:8080"},
		{in: "http://x.example.io:8080", out: "http://x.example.io:8080"},

		// Remove duplicate slashes.
		{in: "http://example.com///x//////y///index.html", out: "http://example.com/x/y/index.html"},

		// Remove unnecesary dots from path:
		{in: "http://example.com/./x/y/z/../index.html", out: "http://example.com/x/y/index.html"},

		// Sort query:
		{in: "http://example.com/index.html?c=z&a=x&b=y", out: "http://example.com/index.html?a=x&b=y&c=z"},

		// Leave fragment as is:
		{in: "http://example.com/index.html#t=20", out: "http://example.com/index.html#t=20"},

		// README example:
		{in: "localhost:80///x///y/z/../././index.html?b=y&a=x#t=20", out: "http://localhost/x/y/index.html?a=x&b=y#t=20"},

		// Decode Punycode into UTF8.
		{in: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/úpěl-ďábelské-ódy", out: "http://www.žluťoučký-kůň.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"},
		{in: "http://xn--kda4b0koi.pl/żółć.html", out: "http://żółć.pl/%C5%BC%C3%B3%C5%82%C4%87.html"},

		// ..more robust test cases covered by Purell
	}

	for _, tt := range tests {
		u, _ := urlx.Parse(tt.in)
		url, err := urlx.Normalize(u)
		if err != nil {
			if !tt.err {
				t.Errorf(`%v: unexpected error \"%v\"`, tt.in, err)
			}
			continue
		}
		if tt.err && err == nil {
			t.Errorf(`%v: expected error`, tt.in)
			continue
		}
		if url != tt.out {
			t.Errorf(`%v: got "%v", want "%v"`, tt.in, url, tt.out)
		}
	}
}