func fiddler2_enable(client *http.Client, proxys string) { transport := &http.Transport{} http_proxy := url.URL{} proxy, _ := http_proxy.Parse(proxys) transport.Proxy = http.ProxyURL(proxy) client.Transport = transport }
func (f *Fetcher) newHostFetcher(u *url.URL) (*hostFetcherInfiniteQ, error) { if u.Host == "" { // The URL must be rooted with a host. return nil, ErrEmptyHost } baseurl, err := u.Parse("/") if err != nil { return nil, err } // Create the infinite queue: the in channel to send on, and the out channel // to read from in the host's goroutine, and add to the hosts map var out chan Command in, out := make(chan Command, 1), make(chan Command, 1) chand := CmdHandlerFunc(func(cmd Command, res *http.Response, err error) { f.Handler.Handle(&Context{Cmd: cmd, Q: f.q}, res, err) }) hf := NewHostFetcher(f.CrawlConfig, baseurl, chand, out) // Start the infinite queue goroutine for this host go sliceIQ(in, out) // Start the working goroutine for this host f.q.wg.Add(1) go func() { hf.Run() f.q.wg.Done() }() return &hostFetcherInfiniteQ{*hf, in}, nil }
func (o *OpenTsdbOutput) Init(config interface{}) (err error) { o.OpenTsdbOutputConfig = config.(*OpenTsdbOutputConfig) //todo: check address validity // if o.url, err = url.Parse(o.Address); err != nil { // return fmt.Errorf("Can't parse URL '%s': %s", o.Address, err.Error()) // } // o.client = &http.Client{ Transport: &timeoutTransport{Transport: new(http.Transport)}, Timeout: time.Minute, } var u *url.URL if u, err = url.Parse(o.Url); err == nil { } o.logMsgChan = make(chan []byte, o.LogMsgChSize) u, err = u.Parse("/api/put") if err != nil { return err } if strings.HasPrefix(u.Host, ":") { u.Host = "localhost" + u.Host } o.Url = u.String() if err != nil { log.Printf("initialize OpenTsdbOutput failed, %s", err.Error()) return err } return }
func dockerRepo(RepoEndpoint string, myProxy string) { repo := http.NewServeMux() if myProxy != "" { url_i := url.URL{} url_proxy, _ := url_i.Parse(myProxy) tr := &http.Transport{ DisableCompression: false, DisableKeepAlives: false, Proxy: http.ProxyURL(url_proxy), TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } clientRepo := &http.Client{Transport: tr} repo.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { sendHttps(w, r, clientRepo) }) log.Fatal(http.ListenAndServe(RepoEndpoint, repo)) } else { tr := &http.Transport{ DisableCompression: false, DisableKeepAlives: false, TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } clientRepo := &http.Client{Transport: tr} repo.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { sendHttps(w, r, clientRepo) }) log.Fatal(http.ListenAndServe(RepoEndpoint, repo)) } }
func (this *ReqQiushiModule) Init(global_conf *context.GlobalContext) (err error) { this.qiushi_url = global_conf.Qiushi.Location this.timeout = global_conf.Qiushi.Timeout /*********设置传输层参数****************/ transport := &http.Transport{} transport.Dial = func(netw, addr string) (net.Conn, error) { c, err := net.DialTimeout(netw, addr, time.Millisecond*time.Duration(this.timeout)) if err != nil { utils.WarningLog.Write("dail timeout [%s]", err.Error()) return nil, err } return c, nil } transport.MaxIdleConnsPerHost = 10 transport.ResponseHeaderTimeout = time.Millisecond * time.Duration(this.timeout) if global_conf.Proxy.Open { url_i := url.URL{} url_proxy, _ := url_i.Parse(global_conf.Proxy.Location) transport.Proxy = http.ProxyURL(url_proxy) utils.DebugLog.Write("open http proxy , proxy location [%s]", global_conf.Proxy.Location) } /**********************************/ this.client = &http.Client{} this.client.Transport = transport utils.DebugLog.Write("req qiushi url [%s]", this.qiushi_url) return }
func batchRequests(requests []*Request, endPoint *url.URL) []*Response { responses := make([]*Response, len(requests)) var wg sync.WaitGroup // TODO: change to use go rutine for i, request := range requests { wg.Add(1) go func(i int, request *Request) { client := &http.Client{ Timeout: 10 * time.Second, } log.Println("Resuest:", request.Method, request.RelativeURL) url, err := endPoint.Parse(request.RelativeURL) showError(err) req, err := http.NewRequest(request.Method, url.String(), strings.NewReader(request.Body)) showError(err) resp, err := client.Do(req) showError(err) log.Println(resp) responses[i] = NewResponse(resp) wg.Done() }(i, request) } wg.Wait() return responses }
func (disc *Discoverer) CookArticleURL(baseURL *url.URL, artLink string) (*url.URL, error) { // parse, extending to absolute u, err := baseURL.Parse(artLink) if err != nil { return nil, err } // on a host we accept? if !disc.isHostGood(u.Host) { return nil, fmt.Errorf("host rejected (%s)", u.Host) } // matches one of our url forms foo := u.RequestURI() accept := false for _, pat := range disc.ArtPats { if pat.MatchString(foo) { accept = true break } } if !accept { return nil, fmt.Errorf("url rejected") } // apply our sanitising rules for this site if disc.StripFragments { u.Fragment = "" } if disc.StripQuery { u.RawQuery = "" } return u, nil }
func StartServerMultiplesBotsHostPort(uri string, pathl string, host string, port string, newrelic *RelicConfig, bots ...*TgBot) { var puri *url.URL if uri != "" { tmpuri, err := url.Parse(uri) if err != nil { fmt.Printf("Bad URL %s", uri) return } puri = tmpuri } botsmap := make(map[string]*TgBot) for _, bot := range bots { tokendiv := strings.Split(bot.Token, ":") if len(tokendiv) != 2 { return } tokenpath := fmt.Sprintf("%s%s", tokendiv[0], tokendiv[1]) botpathl := path.Join(pathl, tokenpath) nuri, _ := puri.Parse(botpathl) remoteuri := nuri.String() res, error := bot.SetWebhook(remoteuri) if error != nil { ec := res.ErrorCode fmt.Printf("Error setting the webhook: \nError code: %d\nDescription: %s\n", &ec, res.Description) continue } if bot.MainListener == nil { bot.StartMainListener() } botsmap[tokenpath] = bot } pathtolisten := path.Join(pathl, "(?P<token>[a-zA-Z0-9-_]+)") m := martini.Classic() m.Post(pathtolisten, binding.Json(MessageWithUpdateID{}), func(params martini.Params, msg MessageWithUpdateID) { bot, ok := botsmap[params["token"]] if ok && msg.UpdateID > 0 && msg.Msg.ID > 0 { bot.MainListener <- msg } else { fmt.Println("Someone tried with: ", params["token"], msg) } }) if newrelic != nil { gorelic.InitNewrelicAgent(newrelic.Token, newrelic.Name, false) m.Use(gorelic.Handler) } if host == "" || port == "" { m.Run() } else { m.RunOnAddr(host + ":" + port) } }
func init() { h := backend.NewHandler() h.CommandName = "get" h.CommandPattern = "(get)( )(.*)" h.Usage = "get url" h.HandlerFunc = func(cmd *backend.Command) (string, bool) { //queryHost := cmd.Tokens[1] //queryPort := cmd.Tokens[2] //queryArgs := cmd.Tokens[3] arg := cmd.Tokens[1] u := new(url.URL) newUrl, err := u.Parse(arg) if err != nil { return "Unable to parse url: " + err.Error(), false } fmt.Println("Request: get", newUrl) res, err := backend.GetHttpResource(newUrl.String()) if err != nil { return err.Error(), false } return res, true } backend.HandlerRegistry[h.CommandName] = h }
func maybeResolvedLink(root *url.URL, other string) string { parsed, err := root.Parse(other) if err == nil { return parsed.String() } return other }
// Init initializes the metadata send queue. func Init(u *url.URL, debug bool) error { mh, err := u.Parse("/api/metadata/put") if err != nil { return err } metahost = mh.String() metadebug = debug go collectMetadata() return nil }
func urlParse(u *url.URL) lua.Function { return func(l *lua.State) int { newU, err := u.Parse(lua.CheckString(l, 1)) if err != nil { lua.Errorf(l, err.Error()) panic("unreachable") } pushURL(l, newU) return 1 } }
// Add ?format=txt to the given URL func addTXTFormatString(u *url.URL) string { ret, err := u.Parse("") if err != nil { fmt.Println(err) return u.String() } val := ret.Query() val.Add("format", "txt") ret.RawQuery = val.Encode() return ret.String() }
func parse(s string, base url.URL) (ln []string, as []string) { z := html.NewTokenizer(strings.NewReader(s)) lnm := make(map[string]struct{}) asm := make(map[string]struct{}) // anonymous func used to get attribute values attr := func(a string) string { var av string for { if k, v, ha := z.TagAttr(); string(k) == a { av = string(v) break } else if ha == false { break } } return av } // convert map to slice slc := func(m map[string]struct{}) []string { var v []string for k := range m { v = append(v, k) } sort.Strings(v) return v } for { tt := z.Next() switch tt { case html.ErrorToken: return slc(lnm), slc(asm) case html.StartTagToken, html.EndTagToken: if tn, ha := z.TagName(); ha { tg := string(tn) if av := attr(attrs[tg]); av != "" { switch tg { case atag: if url, err := base.Parse(av); err == nil { if url.Host == base.Host && url.RequestURI() != base.RequestURI() { lnm[url.RequestURI()] = struct{}{} } } case stag, itag, ltag: asm[av] = struct{}{} } } } } } }
func sanitizeLink(u *url.URL, v string) string { p, err := u.Parse(v) if err != nil { return "" } if !acceptableUriSchemes[p.Scheme] { return "" } return p.String() }
func ParseRef(base *url.URL, s string, f ...func(*url.URL) error) (*url.URL, error) { u, err := base.Parse(s) if err != nil { return nil, err } for _, ff := range f { if err = ff(u); err != nil { return nil, err } } return u, nil }
// This function has been taken from https://github.com/aybabtme/crawler/blob/master/util.go func cleanFromURLString(from *url.URL, link string) (*url.URL, error) { u, err := url.Parse(link) if u.Host == "" { u.Scheme = from.Scheme u.Host = from.Host } uStr := purell.NormalizeURL(u, purell.FlagsUsuallySafeGreedy) clean, err := from.Parse(uStr) return clean, err }
// Init initializes the metadata send queue. func Init(u *url.URL, debug bool) error { mh, err := u.Parse("/api/metadata/put") if err != nil { return err } if strings.HasPrefix(mh.Host, ":") { mh.Host = "localhost" + mh.Host } metahost = mh.String() metadebug = debug go collectMetadata() return nil }
func sanitiseURL(link string, baseURL *url.URL) (string, error) { u, err := baseURL.Parse(link) if err != nil { return "", err } // we're only interested in articles, so reject obviously-not-article urls if (u.Path == "/" || u.Path == "") && len(u.RawQuery) == 0 { return "", fmt.Errorf("obviously not article") } return purell.NormalizeURL(u, purell.FlagsSafe), nil }
func sendLinks(job *Job, ctxURL *url.URL, hrefs <-chan Href, links chan<- Link) { for href := range hrefs { parsed, err := ctxURL.Parse(string(href)) if err != nil || !(parsed.Scheme == "http" || parsed.Scheme == "https") { continue } // Ignore the part of the URL after the "#" parsed.Fragment = "" links <- Link{URL: parsed, Job: job} } close(links) }
func TestResourcePath(t *testing.T) { var url *url.URL var req Request var err error var rp ResourcePath var prev ResourcePathItem var next ResourcePathItem var firstItem ResourcePathItem url, _ = url.Parse("http://localhost:8000/v1.0/Things") req, err = CreateIncomingRequest(url, HTTP) assert.Nil(t, err) assert.NotNil(t, req) rp = req.GetResourcePath() assert.NotNil(t, rp) assert.Equal(t, 1, len(rp.All())) assert.Equal(t, -1, rp.CurrentIndex()) firstItem = rp.First() assert.NotNil(t, firstItem) assert.Equal(t, ENTITY_THINGS, firstItem.GetEntity()) assert.Equal(t, "", firstItem.GetId()) assert.Nil(t, firstItem.GetQueryOptions()) // http://localhost:8000/v1.0/Things(12345) // http://localhost:8000/v1.0/Things(12345)/Locations // http://localhost:8000/v1.0/Things(12345)/Locations(67890) url, _ = url.Parse("http://localhost:8000/v1.0/Datastreams(12345)/Sensor") req, err = CreateIncomingRequest(url, HTTP) assert.Nil(t, err) assert.NotNil(t, req) rp = req.GetResourcePath() assert.NotNil(t, rp) assert.Equal(t, 2, len(rp.All())) assert.Equal(t, -1, rp.CurrentIndex()) assert.Equal(t, "12345", rp.First().GetId()) assert.Equal(t, ENTITY_DATASTREAMS, rp.First().GetEntity()) assert.True(t, rp.IsFirst()) assert.NotNil(t, rp.Last()) assert.True(t, rp.IsLast()) assert.Equal(t, "", rp.Last().GetId()) assert.Equal(t, ENTITY_SENSOR, rp.Last().GetEntity()) prev = rp.Prev() assert.Equal(t, "12345", prev.GetId()) assert.Equal(t, ENTITY_DATASTREAMS, prev.GetEntity()) next = rp.Next() assert.Equal(t, "", next.GetId()) assert.Equal(t, ENTITY_SENSOR, next.GetEntity()) assert.False(t, rp.HasNext()) assert.Nil(t, rp.At(3)) }
func normalizeURL(u1 *url.URL) (u *url.URL, mimetype string) { u, _ = u1.Parse("") // normalize // the file extension overrides the Accept: header if ext := path.Ext(u.Path); ext != "" { mimetype = mime.TypeByExtension(ext) u.Path = strings.TrimSuffix(u.Path, ext) } // add a trailing slash if there isn't one (so that relative // child URLs don't go to the parent) if !strings.HasSuffix(u.Path, "/") { u.Path = u.Path + "/" } return }
func linkMaker(curr *url.URL, l string) (*url.URL, error) { log.Println("Original link", l) if !strings.HasSuffix(curr.Path, "/") { curr.Path += "/" } if u, err := curr.Parse(l); err == nil { u.Fragment = "" return u, nil } else { return nil, err } }
func (c *simpleClient) performRequestUrl(method string, url *url.URL, body io.Reader, configure func(*http.Request), redirectsRemaining int) (res *simpleResponse, err error) { req, err := http.NewRequest(method, url.String(), body) if err != nil { return } req.Header.Set("Authorization", "token "+c.accessToken) req.Header.Set("User-Agent", UserAgent) req.Header.Set("Accept", apiPayloadVersion) if configure != nil { configure(req) } var bodyBackup io.ReadWriter if req.Body != nil { bodyBackup = &bytes.Buffer{} req.Body = ioutil.NopCloser(io.TeeReader(req.Body, bodyBackup)) } httpResponse, err := c.httpClient.Do(req) if err != nil { return } res = &simpleResponse{httpResponse} if res.StatusCode == 307 && redirectsRemaining > 0 { url, err = url.Parse(res.Header.Get("Location")) if err != nil || url.Host != req.URL.Host || url.Scheme != req.URL.Scheme { return } res, err = c.performRequestUrl(method, url, bodyBackup, configure, redirectsRemaining-1) } return }
func mimetypes2net(u *url.URL, mimetypes []string) NetEntity { u, _ = u.Parse("") // dup u.Path = strings.TrimSuffix(u.Path, "/") locations := make([]*url.URL, len(mimetypes)) for i, mimetype := range mimetypes { u2, _ := u.Parse("") exts, _ := mime.ExtensionsByType(mimetype) if exts == nil || len(exts) == 0 { u2.Path += ".httpentity_mimetypes2net_no_extension_should_never_happen?" + mimetype } else { u2.Path += exts[0] } locations[i] = u2 } return NetLocations(locations) }
// ListObjects returns an array of child object names of the object at the given URL. func (d *PostgresqlDriver) ListObjects(url *url.URL) ([]string, error) { var parentID uint64 err := d.db.QueryRow("SELECT id FROM data WHERE uri = $1", url.String()).Scan(&parentID) if err != nil { psqlLog.Error("Error while fetching object %s :: %s", url, err) return nil, InternalServerError } var rows *sql.Rows rows, err = d.db.Query("SELECT uri FROM data WHERE parent_id = $1", parentID) defer rows.Close() if err != nil { psqlLog.Error("Error while fetching children of %s :: %s", url, err) return nil, InternalServerError } uris := make([]string, 0, 25) for rows.Next() { var uri string if err := rows.Scan(&uri); err != nil { psqlLog.Error("Error when reading row :: %s", err) return nil, InternalServerError } u, err := url.Parse(uri) if err != nil { psqlLog.Error("Error while parsing URL %s :: %s", uri, err) return nil, InternalServerError } uris = append(uris, path.Base(u.Path)) } return uris, nil }
func getURL(src *url.URL, token html.Token) (*url.URL, bool) { if !contains([]string{"a", "img", "script", "link"}, token.Data) { return nil, false } for _, attr := range token.Attr { if contains([]string{"href", "src"}, attr.Key) { if u, err := src.Parse(attr.Val); err == nil && ensureCanonical(u) { return u, true } } } return nil, false }
func (w *Worker) AskRobots(url *url.URL) (bool, *heroshi.FetchResult) { robots_url_str := fmt.Sprintf("%s://%s/robots.txt", url.Scheme, url.Host) robots_url, err := url.Parse(robots_url_str) if err != nil { return false, heroshi.ErrorResult(url, err.Error()) } fetch_result := w.Fetch(robots_url) if !fetch_result.Success { fetch_result.Status = "Robots download error: " + fetch_result.Status return false, fetch_result } var robots *robotstxt.RobotsData robots, err = robotstxt.FromStatusAndBytes(fetch_result.StatusCode, fetch_result.Body) if err != nil { fetch_result.Status = "Robots parse error: " + err.Error() return false, fetch_result } allow := robots.TestAgent(url.Path, w.UserAgent) if !allow { return allow, heroshi.ErrorResult(url, "Robots disallow") } return allow, nil }
func processRun( parentCtx *common.Context, client *http.Client, baseURL *url.URL, ) error { requestURL, err := baseURL.Parse("run/request/") if err != nil { panic(err) } resp, err := client.Get(requestURL.String()) if err != nil { return err } defer resp.Body.Close() ctx := parentCtx.DebugContext() syncID, err := strconv.ParseUint(resp.Header.Get("Sync-ID"), 10, 64) if err != nil { return err } ctx.EventCollector.Add(ctx.EventFactory.NewReceiverClockSyncEvent(syncID)) decoder := json.NewDecoder(resp.Body) var run common.Run if err := decoder.Decode(&run); err != nil { return err } uploadURL, err := baseURL.Parse(fmt.Sprintf("run/%d/results/", run.AttemptID)) if err != nil { return err } finished := make(chan error, 1) if err = gradeAndUploadResults( ctx, client, uploadURL.String(), &run, finished, ); err != nil { return err } return <-finished }
//指定代理ip func getTransportFieldURL(proxy_addr *string) (transport *http.Transport) { url_i := url.URL{} url_proxy, _ := url_i.Parse(*proxy_addr) transport = &http.Transport{ Proxy: http.ProxyURL(url_proxy), Dial: func(netw, addr string) (net.Conn, error) { deadline := time.Now().Add(6 * time.Second) c, err := net.DialTimeout(netw, addr, time.Second*20) if err != nil { return nil, err } c.SetDeadline(deadline) return c, nil }, } return }