Exemple #1
0
func fiddler2_enable(client *http.Client, proxys string) {
	transport := &http.Transport{}
	http_proxy := url.URL{}
	proxy, _ := http_proxy.Parse(proxys)
	transport.Proxy = http.ProxyURL(proxy)
	client.Transport = transport
}
Exemple #2
0
func (f *Fetcher) newHostFetcher(u *url.URL) (*hostFetcherInfiniteQ, error) {
	if u.Host == "" {
		// The URL must be rooted with a host.
		return nil, ErrEmptyHost
	}
	baseurl, err := u.Parse("/")
	if err != nil {
		return nil, err
	}

	// Create the infinite queue: the in channel to send on, and the out channel
	// to read from in the host's goroutine, and add to the hosts map
	var out chan Command
	in, out := make(chan Command, 1), make(chan Command, 1)
	chand := CmdHandlerFunc(func(cmd Command, res *http.Response, err error) {
		f.Handler.Handle(&Context{Cmd: cmd, Q: f.q}, res, err)
	})
	hf := NewHostFetcher(f.CrawlConfig, baseurl, chand, out)
	// Start the infinite queue goroutine for this host
	go sliceIQ(in, out)
	// Start the working goroutine for this host
	f.q.wg.Add(1)
	go func() {
		hf.Run()
		f.q.wg.Done()
	}()
	return &hostFetcherInfiniteQ{*hf, in}, nil
}
func (o *OpenTsdbOutput) Init(config interface{}) (err error) {
	o.OpenTsdbOutputConfig = config.(*OpenTsdbOutputConfig)
	//todo: check address validity
	// if o.url, err = url.Parse(o.Address); err != nil {
	//     return fmt.Errorf("Can't parse URL '%s': %s", o.Address, err.Error())
	// }
	//
	o.client = &http.Client{
		Transport: &timeoutTransport{Transport: new(http.Transport)},
		Timeout:   time.Minute,
	}

	var u *url.URL
	if u, err = url.Parse(o.Url); err == nil {
	}

	o.logMsgChan = make(chan []byte, o.LogMsgChSize)

	u, err = u.Parse("/api/put")
	if err != nil {
		return err
	}
	if strings.HasPrefix(u.Host, ":") {
		u.Host = "localhost" + u.Host
	}
	o.Url = u.String()

	if err != nil {
		log.Printf("initialize OpenTsdbOutput failed, %s", err.Error())
		return err
	}
	return
}
Exemple #4
0
func dockerRepo(RepoEndpoint string, myProxy string) {
	repo := http.NewServeMux()
	if myProxy != "" {
		url_i := url.URL{}
		url_proxy, _ := url_i.Parse(myProxy)

		tr := &http.Transport{
			DisableCompression: false,
			DisableKeepAlives:  false,
			Proxy:              http.ProxyURL(url_proxy),
			TLSClientConfig:    &tls.Config{InsecureSkipVerify: true},
		}
		clientRepo := &http.Client{Transport: tr}

		repo.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
			sendHttps(w, r, clientRepo)
		})
		log.Fatal(http.ListenAndServe(RepoEndpoint, repo))
	} else {
		tr := &http.Transport{
			DisableCompression: false,
			DisableKeepAlives:  false,
			TLSClientConfig:    &tls.Config{InsecureSkipVerify: true},
		}
		clientRepo := &http.Client{Transport: tr}

		repo.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
			sendHttps(w, r, clientRepo)
		})
		log.Fatal(http.ListenAndServe(RepoEndpoint, repo))
	}
}
Exemple #5
0
func (this *ReqQiushiModule) Init(global_conf *context.GlobalContext) (err error) {
	this.qiushi_url = global_conf.Qiushi.Location
	this.timeout = global_conf.Qiushi.Timeout
	/*********设置传输层参数****************/
	transport := &http.Transport{}
	transport.Dial = func(netw, addr string) (net.Conn, error) {
		c, err := net.DialTimeout(netw, addr, time.Millisecond*time.Duration(this.timeout))
		if err != nil {
			utils.WarningLog.Write("dail timeout [%s]", err.Error())
			return nil, err
		}
		return c, nil
	}
	transport.MaxIdleConnsPerHost = 10
	transport.ResponseHeaderTimeout = time.Millisecond * time.Duration(this.timeout)
	if global_conf.Proxy.Open {
		url_i := url.URL{}
		url_proxy, _ := url_i.Parse(global_conf.Proxy.Location)
		transport.Proxy = http.ProxyURL(url_proxy)
		utils.DebugLog.Write("open http proxy , proxy location [%s]", global_conf.Proxy.Location)
	}
	/**********************************/
	this.client = &http.Client{}
	this.client.Transport = transport
	utils.DebugLog.Write("req qiushi url [%s]", this.qiushi_url)

	return
}
func batchRequests(requests []*Request, endPoint *url.URL) []*Response {
	responses := make([]*Response, len(requests))
	var wg sync.WaitGroup
	// TODO: change to use go rutine
	for i, request := range requests {
		wg.Add(1)
		go func(i int, request *Request) {
			client := &http.Client{
				Timeout: 10 * time.Second,
			}
			log.Println("Resuest:", request.Method, request.RelativeURL)
			url, err := endPoint.Parse(request.RelativeURL)
			showError(err)
			req, err := http.NewRequest(request.Method, url.String(), strings.NewReader(request.Body))
			showError(err)
			resp, err := client.Do(req)
			showError(err)
			log.Println(resp)
			responses[i] = NewResponse(resp)
			wg.Done()
		}(i, request)
	}
	wg.Wait()
	return responses
}
Exemple #7
0
func (disc *Discoverer) CookArticleURL(baseURL *url.URL, artLink string) (*url.URL, error) {
	// parse, extending to absolute
	u, err := baseURL.Parse(artLink)
	if err != nil {
		return nil, err
	}

	// on a host we accept?
	if !disc.isHostGood(u.Host) {
		return nil, fmt.Errorf("host rejected (%s)", u.Host)
	}

	// matches one of our url forms
	foo := u.RequestURI()
	accept := false
	for _, pat := range disc.ArtPats {
		if pat.MatchString(foo) {
			accept = true
			break
		}
	}
	if !accept {
		return nil, fmt.Errorf("url rejected")
	}

	// apply our sanitising rules for this site
	if disc.StripFragments {
		u.Fragment = ""
	}
	if disc.StripQuery {
		u.RawQuery = ""
	}
	return u, nil
}
Exemple #8
0
func StartServerMultiplesBotsHostPort(uri string, pathl string, host string, port string, newrelic *RelicConfig, bots ...*TgBot) {
	var puri *url.URL
	if uri != "" {
		tmpuri, err := url.Parse(uri)
		if err != nil {
			fmt.Printf("Bad URL %s", uri)
			return
		}
		puri = tmpuri
	}

	botsmap := make(map[string]*TgBot)
	for _, bot := range bots {
		tokendiv := strings.Split(bot.Token, ":")
		if len(tokendiv) != 2 {
			return
		}

		tokenpath := fmt.Sprintf("%s%s", tokendiv[0], tokendiv[1])
		botpathl := path.Join(pathl, tokenpath)

		nuri, _ := puri.Parse(botpathl)
		remoteuri := nuri.String()
		res, error := bot.SetWebhook(remoteuri)

		if error != nil {
			ec := res.ErrorCode
			fmt.Printf("Error setting the webhook: \nError code: %d\nDescription: %s\n", &ec, res.Description)
			continue
		}
		if bot.MainListener == nil {
			bot.StartMainListener()
		}
		botsmap[tokenpath] = bot
	}

	pathtolisten := path.Join(pathl, "(?P<token>[a-zA-Z0-9-_]+)")

	m := martini.Classic()
	m.Post(pathtolisten, binding.Json(MessageWithUpdateID{}), func(params martini.Params, msg MessageWithUpdateID) {
		bot, ok := botsmap[params["token"]]

		if ok && msg.UpdateID > 0 && msg.Msg.ID > 0 {
			bot.MainListener <- msg
		} else {
			fmt.Println("Someone tried with: ", params["token"], msg)
		}
	})

	if newrelic != nil {
		gorelic.InitNewrelicAgent(newrelic.Token, newrelic.Name, false)
		m.Use(gorelic.Handler)
	}

	if host == "" || port == "" {
		m.Run()
	} else {
		m.RunOnAddr(host + ":" + port)
	}
}
Exemple #9
0
func init() {
	h := backend.NewHandler()
	h.CommandName = "get"
	h.CommandPattern = "(get)( )(.*)"
	h.Usage = "get url"
	h.HandlerFunc = func(cmd *backend.Command) (string, bool) {
		//queryHost := cmd.Tokens[1]
		//queryPort := cmd.Tokens[2]
		//queryArgs := cmd.Tokens[3]

		arg := cmd.Tokens[1]
		u := new(url.URL)
		newUrl, err := u.Parse(arg)
		if err != nil {
			return "Unable to parse url: " + err.Error(), false
		}
		fmt.Println("Request: get", newUrl)
		res, err := backend.GetHttpResource(newUrl.String())
		if err != nil {
			return err.Error(), false
		}
		return res, true
	}
	backend.HandlerRegistry[h.CommandName] = h
}
Exemple #10
0
func maybeResolvedLink(root *url.URL, other string) string {
	parsed, err := root.Parse(other)
	if err == nil {
		return parsed.String()
	}

	return other
}
Exemple #11
0
// Init initializes the metadata send queue.
func Init(u *url.URL, debug bool) error {
	mh, err := u.Parse("/api/metadata/put")
	if err != nil {
		return err
	}
	metahost = mh.String()
	metadebug = debug
	go collectMetadata()
	return nil
}
Exemple #12
0
func urlParse(u *url.URL) lua.Function {
	return func(l *lua.State) int {
		newU, err := u.Parse(lua.CheckString(l, 1))
		if err != nil {
			lua.Errorf(l, err.Error())
			panic("unreachable")
		}
		pushURL(l, newU)
		return 1
	}
}
Exemple #13
0
// Add ?format=txt to the given URL
func addTXTFormatString(u *url.URL) string {
	ret, err := u.Parse("")
	if err != nil {
		fmt.Println(err)
		return u.String()
	}
	val := ret.Query()
	val.Add("format", "txt")
	ret.RawQuery = val.Encode()
	return ret.String()
}
Exemple #14
0
func parse(s string, base url.URL) (ln []string, as []string) {
	z := html.NewTokenizer(strings.NewReader(s))
	lnm := make(map[string]struct{})
	asm := make(map[string]struct{})

	// anonymous func used to get attribute values
	attr := func(a string) string {
		var av string
		for {
			if k, v, ha := z.TagAttr(); string(k) == a {
				av = string(v)
				break
			} else if ha == false {
				break
			}
		}
		return av
	}

	// convert map to slice
	slc := func(m map[string]struct{}) []string {
		var v []string

		for k := range m {
			v = append(v, k)
		}
		sort.Strings(v)
		return v
	}

	for {
		tt := z.Next()
		switch tt {
		case html.ErrorToken:
			return slc(lnm), slc(asm)
		case html.StartTagToken, html.EndTagToken:
			if tn, ha := z.TagName(); ha {
				tg := string(tn)
				if av := attr(attrs[tg]); av != "" {
					switch tg {
					case atag:
						if url, err := base.Parse(av); err == nil {
							if url.Host == base.Host && url.RequestURI() != base.RequestURI() {
								lnm[url.RequestURI()] = struct{}{}
							}
						}
					case stag, itag, ltag:
						asm[av] = struct{}{}
					}
				}
			}
		}
	}
}
Exemple #15
0
func sanitizeLink(u *url.URL, v string) string {
	p, err := u.Parse(v)
	if err != nil {
		return ""
	}
	if !acceptableUriSchemes[p.Scheme] {
		return ""
	}

	return p.String()
}
Exemple #16
0
func ParseRef(base *url.URL, s string, f ...func(*url.URL) error) (*url.URL, error) {
	u, err := base.Parse(s)
	if err != nil {
		return nil, err
	}
	for _, ff := range f {
		if err = ff(u); err != nil {
			return nil, err
		}
	}
	return u, nil
}
Exemple #17
0
// This function has been taken from https://github.com/aybabtme/crawler/blob/master/util.go
func cleanFromURLString(from *url.URL, link string) (*url.URL, error) {
	u, err := url.Parse(link)
	if u.Host == "" {
		u.Scheme = from.Scheme
		u.Host = from.Host
	}
	uStr := purell.NormalizeURL(u, purell.FlagsUsuallySafeGreedy)

	clean, err := from.Parse(uStr)

	return clean, err
}
Exemple #18
0
// Init initializes the metadata send queue.
func Init(u *url.URL, debug bool) error {
	mh, err := u.Parse("/api/metadata/put")
	if err != nil {
		return err
	}
	if strings.HasPrefix(mh.Host, ":") {
		mh.Host = "localhost" + mh.Host
	}
	metahost = mh.String()
	metadebug = debug
	go collectMetadata()
	return nil
}
Exemple #19
0
func sanitiseURL(link string, baseURL *url.URL) (string, error) {
	u, err := baseURL.Parse(link)
	if err != nil {
		return "", err
	}

	// we're only interested in articles, so reject obviously-not-article urls
	if (u.Path == "/" || u.Path == "") && len(u.RawQuery) == 0 {
		return "", fmt.Errorf("obviously not article")
	}

	return purell.NormalizeURL(u, purell.FlagsSafe), nil
}
func sendLinks(job *Job, ctxURL *url.URL, hrefs <-chan Href, links chan<- Link) {
	for href := range hrefs {
		parsed, err := ctxURL.Parse(string(href))
		if err != nil || !(parsed.Scheme == "http" || parsed.Scheme == "https") {
			continue
		}
		// Ignore the part of the URL after the "#"
		parsed.Fragment = ""

		links <- Link{URL: parsed, Job: job}
	}
	close(links)
}
func TestResourcePath(t *testing.T) {
	var url *url.URL
	var req Request
	var err error
	var rp ResourcePath
	var prev ResourcePathItem
	var next ResourcePathItem
	var firstItem ResourcePathItem

	url, _ = url.Parse("http://localhost:8000/v1.0/Things")
	req, err = CreateIncomingRequest(url, HTTP)
	assert.Nil(t, err)
	assert.NotNil(t, req)
	rp = req.GetResourcePath()
	assert.NotNil(t, rp)
	assert.Equal(t, 1, len(rp.All()))
	assert.Equal(t, -1, rp.CurrentIndex())
	firstItem = rp.First()
	assert.NotNil(t, firstItem)
	assert.Equal(t, ENTITY_THINGS, firstItem.GetEntity())
	assert.Equal(t, "", firstItem.GetId())
	assert.Nil(t, firstItem.GetQueryOptions())

	// http://localhost:8000/v1.0/Things(12345)
	// http://localhost:8000/v1.0/Things(12345)/Locations
	// http://localhost:8000/v1.0/Things(12345)/Locations(67890)

	url, _ = url.Parse("http://localhost:8000/v1.0/Datastreams(12345)/Sensor")
	req, err = CreateIncomingRequest(url, HTTP)
	assert.Nil(t, err)
	assert.NotNil(t, req)
	rp = req.GetResourcePath()
	assert.NotNil(t, rp)
	assert.Equal(t, 2, len(rp.All()))
	assert.Equal(t, -1, rp.CurrentIndex())
	assert.Equal(t, "12345", rp.First().GetId())
	assert.Equal(t, ENTITY_DATASTREAMS, rp.First().GetEntity())
	assert.True(t, rp.IsFirst())
	assert.NotNil(t, rp.Last())
	assert.True(t, rp.IsLast())
	assert.Equal(t, "", rp.Last().GetId())
	assert.Equal(t, ENTITY_SENSOR, rp.Last().GetEntity())
	prev = rp.Prev()
	assert.Equal(t, "12345", prev.GetId())
	assert.Equal(t, ENTITY_DATASTREAMS, prev.GetEntity())
	next = rp.Next()
	assert.Equal(t, "", next.GetId())
	assert.Equal(t, ENTITY_SENSOR, next.GetEntity())
	assert.False(t, rp.HasNext())
	assert.Nil(t, rp.At(3))
}
Exemple #22
0
func normalizeURL(u1 *url.URL) (u *url.URL, mimetype string) {
	u, _ = u1.Parse("") // normalize
	// the file extension overrides the Accept: header
	if ext := path.Ext(u.Path); ext != "" {
		mimetype = mime.TypeByExtension(ext)
		u.Path = strings.TrimSuffix(u.Path, ext)
	}
	// add a trailing slash if there isn't one (so that relative
	// child URLs don't go to the parent)
	if !strings.HasSuffix(u.Path, "/") {
		u.Path = u.Path + "/"
	}
	return
}
Exemple #23
0
func linkMaker(curr *url.URL, l string) (*url.URL, error) {

	log.Println("Original link", l)
	if !strings.HasSuffix(curr.Path, "/") {
		curr.Path += "/"
	}

	if u, err := curr.Parse(l); err == nil {
		u.Fragment = ""
		return u, nil
	} else {
		return nil, err
	}
}
Exemple #24
0
func (c *simpleClient) performRequestUrl(method string, url *url.URL, body io.Reader, configure func(*http.Request), redirectsRemaining int) (res *simpleResponse, err error) {
	req, err := http.NewRequest(method, url.String(), body)
	if err != nil {
		return
	}
	req.Header.Set("Authorization", "token "+c.accessToken)
	req.Header.Set("User-Agent", UserAgent)
	req.Header.Set("Accept", apiPayloadVersion)

	if configure != nil {
		configure(req)
	}

	var bodyBackup io.ReadWriter
	if req.Body != nil {
		bodyBackup = &bytes.Buffer{}
		req.Body = ioutil.NopCloser(io.TeeReader(req.Body, bodyBackup))
	}

	httpResponse, err := c.httpClient.Do(req)
	if err != nil {
		return
	}

	res = &simpleResponse{httpResponse}
	if res.StatusCode == 307 && redirectsRemaining > 0 {
		url, err = url.Parse(res.Header.Get("Location"))
		if err != nil || url.Host != req.URL.Host || url.Scheme != req.URL.Scheme {
			return
		}
		res, err = c.performRequestUrl(method, url, bodyBackup, configure, redirectsRemaining-1)
	}

	return
}
Exemple #25
0
func mimetypes2net(u *url.URL, mimetypes []string) NetEntity {
	u, _ = u.Parse("") // dup
	u.Path = strings.TrimSuffix(u.Path, "/")
	locations := make([]*url.URL, len(mimetypes))
	for i, mimetype := range mimetypes {
		u2, _ := u.Parse("")
		exts, _ := mime.ExtensionsByType(mimetype)
		if exts == nil || len(exts) == 0 {
			u2.Path += ".httpentity_mimetypes2net_no_extension_should_never_happen?" + mimetype
		} else {
			u2.Path += exts[0]
		}
		locations[i] = u2
	}
	return NetLocations(locations)
}
// ListObjects returns an array of child object names of the object at the given URL.
func (d *PostgresqlDriver) ListObjects(url *url.URL) ([]string, error) {
	var parentID uint64
	err := d.db.QueryRow("SELECT id FROM data WHERE uri = $1", url.String()).Scan(&parentID)
	if err != nil {
		psqlLog.Error("Error while fetching object %s :: %s", url, err)
		return nil, InternalServerError
	}
	var rows *sql.Rows
	rows, err = d.db.Query("SELECT uri FROM data WHERE parent_id = $1", parentID)
	defer rows.Close()
	if err != nil {
		psqlLog.Error("Error while fetching children of %s :: %s", url, err)
		return nil, InternalServerError
	}
	uris := make([]string, 0, 25)
	for rows.Next() {
		var uri string
		if err := rows.Scan(&uri); err != nil {
			psqlLog.Error("Error when reading row :: %s", err)
			return nil, InternalServerError
		}
		u, err := url.Parse(uri)
		if err != nil {
			psqlLog.Error("Error while parsing URL %s :: %s", uri, err)
			return nil, InternalServerError
		}
		uris = append(uris, path.Base(u.Path))
	}
	return uris, nil
}
Exemple #27
0
func getURL(src *url.URL, token html.Token) (*url.URL, bool) {

	if !contains([]string{"a", "img", "script", "link"}, token.Data) {
		return nil, false
	}

	for _, attr := range token.Attr {
		if contains([]string{"href", "src"}, attr.Key) {
			if u, err := src.Parse(attr.Val); err == nil && ensureCanonical(u) {
				return u, true
			}
		}
	}

	return nil, false
}
Exemple #28
0
func (w *Worker) AskRobots(url *url.URL) (bool, *heroshi.FetchResult) {
	robots_url_str := fmt.Sprintf("%s://%s/robots.txt", url.Scheme, url.Host)
	robots_url, err := url.Parse(robots_url_str)
	if err != nil {
		return false, heroshi.ErrorResult(url, err.Error())
	}

	fetch_result := w.Fetch(robots_url)

	if !fetch_result.Success {
		fetch_result.Status = "Robots download error: " + fetch_result.Status
		return false, fetch_result
	}

	var robots *robotstxt.RobotsData
	robots, err = robotstxt.FromStatusAndBytes(fetch_result.StatusCode, fetch_result.Body)
	if err != nil {
		fetch_result.Status = "Robots parse error: " + err.Error()
		return false, fetch_result
	}

	allow := robots.TestAgent(url.Path, w.UserAgent)
	if !allow {
		return allow, heroshi.ErrorResult(url, "Robots disallow")
	}

	return allow, nil
}
Exemple #29
0
func processRun(
	parentCtx *common.Context,
	client *http.Client,
	baseURL *url.URL,
) error {
	requestURL, err := baseURL.Parse("run/request/")
	if err != nil {
		panic(err)
	}
	resp, err := client.Get(requestURL.String())
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	ctx := parentCtx.DebugContext()
	syncID, err := strconv.ParseUint(resp.Header.Get("Sync-ID"), 10, 64)
	if err != nil {
		return err
	}
	ctx.EventCollector.Add(ctx.EventFactory.NewReceiverClockSyncEvent(syncID))

	decoder := json.NewDecoder(resp.Body)
	var run common.Run
	if err := decoder.Decode(&run); err != nil {
		return err
	}
	uploadURL, err := baseURL.Parse(fmt.Sprintf("run/%d/results/", run.AttemptID))
	if err != nil {
		return err
	}

	finished := make(chan error, 1)

	if err = gradeAndUploadResults(
		ctx,
		client,
		uploadURL.String(),
		&run,
		finished,
	); err != nil {
		return err
	}

	return <-finished
}
Exemple #30
0
//指定代理ip
func getTransportFieldURL(proxy_addr *string) (transport *http.Transport) {
	url_i := url.URL{}
	url_proxy, _ := url_i.Parse(*proxy_addr)
	transport = &http.Transport{
		Proxy: http.ProxyURL(url_proxy),

		Dial: func(netw, addr string) (net.Conn, error) {
			deadline := time.Now().Add(6 * time.Second)
			c, err := net.DialTimeout(netw, addr, time.Second*20)
			if err != nil {
				return nil, err
			}
			c.SetDeadline(deadline)
			return c, nil
		},
	}
	return
}