func TestURLResolve(t *testing.T) { tests := []struct { in string out string err bool }{ {in: "localhost", out: "127.0.0.1"}, {in: "google.com"}, {in: "some.weird.hostname.example.com", err: true}, } for _, tt := range tests { u, _ := urlx.Parse(tt.in) ip, err := urlx.Resolve(u) if !tt.err && err != nil { t.Errorf(`%v: unexpected error \"%v\"`, tt.in, err) continue } if tt.err && err == nil { t.Errorf(`%v: expected error`, tt.in) } if tt.out != "" && tt.out != fmt.Sprint(ip) { t.Errorf(`%v: got "%v", want "%v"`, tt.in, ip, tt.out) } } }
func (hf HttpFetcher) GetAll(urls []string) ([]*HttpFetcherResponse, error) { m := metrics.GetOrRegisterTimer("fn.FetchRemoteData", nil) // TODO: update metric name defer m.UpdateSince(time.Now()) resps := make([]*HttpFetcherResponse, len(urls)) var wg sync.WaitGroup wg.Add(len(urls)) // TODO: add thruput here.. for i, urlStr := range urls { resps[i] = &HttpFetcherResponse{} go func(resp *HttpFetcherResponse) { defer wg.Done() url, err := urlx.Parse(urlStr) if err != nil { resp.Err = err return } resp.URL = url lg.Infof("Fetching %s", url.String()) fetch, err := hf.client().Get(url.String()) if err != nil { lg.Warnf("Error fetching %s because %s", url.String(), err) resp.Err = err return } defer fetch.Body.Close() resp.Status = fetch.StatusCode body, err := ioutil.ReadAll(fetch.Body) if err != nil { resp.Err = err return } resp.Data = body resp.Err = nil }(resps[i]) } wg.Wait() return resps, nil }
func (f Fetcher) GetAll(ctx context.Context, urls []string) ([]*FetcherResponse, error) { defer metrics.MeasureSince([]string{"fn.FetchRemoteData"}, time.Now()) fetches := make([]*FetcherResponse, len(urls)) var wg sync.WaitGroup wg.Add(len(urls)) // TODO: add thruput here.. for i, urlStr := range urls { fetches[i] = &FetcherResponse{} go func(fetch *FetcherResponse) { defer wg.Done() url, err := urlx.Parse(urlStr) if err != nil { fetch.Err = err return } fetch.URL = url lg.Infof("Fetching %s", url.String()) resp, err := ctxhttp.Get(ctx, f.client(), url.String()) if err != nil { lg.Warnf("Error fetching %s because %s", url.String(), err) fetch.Err = err return } defer resp.Body.Close() fetch.Status = resp.StatusCode body, err := ioutil.ReadAll(resp.Body) if err != nil { fetch.Err = err return } fetch.Data = body fetch.Err = nil }(fetches[i]) } wg.Wait() return fetches, nil }
func BucketFetchItem(ctx context.Context, w http.ResponseWriter, r *http.Request) { bucket, err := NewBucket(chi.URLParams(ctx)["bucket"]) if err != nil { respond.ImageError(w, 422, err) return } fetchUrl := r.URL.Query().Get("url") if fetchUrl == "" { respond.ImageError(w, 422, ErrInvalidURL) return } u, err := urlx.Parse(fetchUrl) if err != nil { respond.ImageError(w, 422, ErrInvalidURL) return } fetchUrl = u.String() imKey := sha1Hash(fetchUrl) // transform to what is expected.. chi.URLParams(ctx)["key"] = imKey // First check if we have the original.. a bit of extra overhead, but its okay _, err = bucket.DbFindImage(ctx, imKey, nil) if err != nil && err != ErrImageNotFound { respond.ImageError(w, 422, err) return } // Fetch the image on-demand and add to bucket if we dont have it if err == ErrImageNotFound { // TODO: add image sizing throttler here.... _, err := bucket.AddImagesFromUrls(ctx, []string{fetchUrl}) if err != nil { lg.Errorf("Fetching failed for %s because %s", fetchUrl, err) respond.ImageError(w, 422, err) return } } BucketGetItem(ctx, w, r) }
func TestParse(t *testing.T) { tests := []struct { in string out string err bool }{ // Error out on missing host: {in: "", err: true}, {in: "/", err: true}, {in: "//", err: true}, // Test schemes: {in: "http://example.com", out: "http://example.com"}, {in: "HTTP://x.example.com", out: "http://x.example.com"}, {in: "http://localhost", out: "http://localhost"}, {in: "http://user.local", out: "http://user.local"}, {in: "https://example.com", out: "https://example.com"}, {in: "HTTPS://example.com", out: "https://example.com"}, {in: "ssh://example.com:22", out: "ssh://example.com:22"}, {in: "jabber://example.com:5222", out: "jabber://example.com:5222"}, // Leading double slashes (any scheme) defaults to http: {in: "//example.com", out: "http://example.com"}, // Empty scheme defaults to http: {in: "localhost", out: "http://localhost"}, {in: "LOCALHOST", out: "http://localhost"}, {in: "localhost:80", out: "http://*****:*****@example.com", out: "http://[email protected]"}, {in: "user:[email protected]", out: "http://*****:*****@example.com"}, {in: "https://*****:*****@subsub.sub.example.com", out: "https://*****:*****@subsub.sub.example.com"}, // Lowercase scheme and host by default. Let net/url normalize URL by default: {in: "hTTp://subSUB.sub.EXAMPLE.COM/x//////y///foo.mp3?c=z&a=x&b=y#t=20", out: "http://subsub.sub.example.com/x//////y///foo.mp3?c=z&a=x&b=y#t=20"}, // IDNA Punycode domains. // TODO: net/url escapes all the fields in String() method. Should we fix it? {in: "http://www.žluťoučký-kůň.cz/úpěl-ďábelské-ódy", out: "http://www.%C5%BElu%C5%A5ou%C4%8Dk%C3%BD-k%C5%AF%C5%88.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"}, {in: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/úpěl-ďábelské-ódy", out: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"}, {in: "http://żółć.pl/żółć.html", out: "http://%C5%BC%C3%B3%C5%82%C4%87.pl/%C5%BC%C3%B3%C5%82%C4%87.html"}, {in: "http://xn--kda4b0koi.pl/żółć.html", out: "http://xn--kda4b0koi.pl/%C5%BC%C3%B3%C5%82%C4%87.html"}, // IANA TLDs. // TODO: net/url escapes all the fields in String() method. Should we fix it? {in: "https://pressly.餐厅", out: "https://pressly.%E9%A4%90%E5%8E%85"}, {in: "https://pressly.组织机构", out: "https://pressly.%E7%BB%84%E7%BB%87%E6%9C%BA%E6%9E%84"}, // Some obviously wrong data: {in: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==", err: true}, {in: "javascript:evilFunction()", err: true}, {in: "otherscheme:garbage", err: true}, {in: "<funnnytag>", err: true}, } for _, tt := range tests { url, err := urlx.Parse(tt.in) if err != nil { if !tt.err { t.Errorf(`"%s": unexpected error "%v"`, tt.in, err) } continue } if tt.err && err == nil { t.Errorf(`"%s": expected error`, tt.in) continue } if url.String() != tt.out { t.Errorf(`"%s": got "%s", want "%v"`, tt.in, url, tt.out) } } }
func TestURLNormalize(t *testing.T) { tests := []struct { in string out string err bool }{ // Remove unnecessary host dots: // Purell bug? They claim the following works.. //{in: "http://..example..com../index.html", out: "http://example.com/index.html"}, // Remove default port: {in: "http://example.com:80/index.html", out: "http://example.com/index.html"}, {in: "localhost:80", out: "http://localhost"}, {in: "127.0.0.1:80", out: "http://127.0.0.1"}, {in: "[2001:db8:a0b:12f0::1]:80", out: "http://[2001:db8:a0b:12f0::1]"}, // Empty scheme defaults to http: {in: "localhost", out: "http://localhost"}, {in: "LOCALHOST", out: "http://localhost"}, {in: "localhost:80", out: "http://localhost"}, {in: "localhost:8080", out: "http://localhost:8080"}, {in: "user.local", out: "http://user.local"}, {in: "user.local:80", out: "http://user.local"}, {in: "user.local:8080", out: "http://user.local:8080"}, {in: "127.0.0.1", out: "http://127.0.0.1"}, {in: "127.0.0.1:80", out: "http://127.0.0.1"}, {in: "127.0.0.1:8080", out: "http://127.0.0.1:8080"}, {in: "[2001:db8:a0b:12f0::1]", out: "http://[2001:db8:a0b:12f0::1]"}, {in: "[2001:db8:a0b:12f0::1]:80", out: "http://[2001:db8:a0b:12f0::1]"}, {in: "[2001:db8:a0b:12f0::1]:8080", out: "http://[2001:db8:a0b:12f0::1]:8080"}, {in: "[2001:db8:a0b:12f0::80]", out: "http://[2001:db8:a0b:12f0::80]"}, {in: "[2001:db8:a0b:12f0::80]:80", out: "http://[2001:db8:a0b:12f0::80]"}, {in: "[2001:db8:a0b:12f0::80]:8080", out: "http://[2001:db8:a0b:12f0::80]:8080"}, {in: "http://localhost:8080", out: "http://localhost:8080"}, {in: "http://x.example.io:8080", out: "http://x.example.io:8080"}, // Remove duplicate slashes. {in: "http://example.com///x//////y///index.html", out: "http://example.com/x/y/index.html"}, // Remove unnecesary dots from path: {in: "http://example.com/./x/y/z/../index.html", out: "http://example.com/x/y/index.html"}, // Sort query: {in: "http://example.com/index.html?c=z&a=x&b=y", out: "http://example.com/index.html?a=x&b=y&c=z"}, // Leave fragment as is: {in: "http://example.com/index.html#t=20", out: "http://example.com/index.html#t=20"}, // README example: {in: "localhost:80///x///y/z/../././index.html?b=y&a=x#t=20", out: "http://localhost/x/y/index.html?a=x&b=y#t=20"}, // Decode Punycode into UTF8. {in: "http://www.xn--luouk-k-z2a6lsyxjlexh.cz/úpěl-ďábelské-ódy", out: "http://www.žluťoučký-kůň.cz/%C3%BAp%C4%9Bl-%C4%8F%C3%A1belsk%C3%A9-%C3%B3dy"}, {in: "http://xn--kda4b0koi.pl/żółć.html", out: "http://żółć.pl/%C5%BC%C3%B3%C5%82%C4%87.html"}, // ..more robust test cases covered by Purell } for _, tt := range tests { u, _ := urlx.Parse(tt.in) url, err := urlx.Normalize(u) if err != nil { if !tt.err { t.Errorf(`%v: unexpected error \"%v\"`, tt.in, err) } continue } if tt.err && err == nil { t.Errorf(`%v: expected error`, tt.in) continue } if url != tt.out { t.Errorf(`%v: got "%v", want "%v"`, tt.in, url, tt.out) } } }