Example #1
func fiddler2_enable(client *http.Client, proxys string) {
	transport := &http.Transport{}
	http_proxy := url.URL{}
	proxy, _ := http_proxy.Parse(proxys)
	transport.Proxy = http.ProxyURL(proxy)
	client.Transport = transport
Example #2
func (f *Fetcher) newHostFetcher(u *url.URL) (*hostFetcherInfiniteQ, error) {
	if u.Host == "" {
		// The URL must be rooted with a host.
		return nil, ErrEmptyHost
	baseurl, err := u.Parse("/")
	if err != nil {
		return nil, err

	// Create the infinite queue: the in channel to send on, and the out channel
	// to read from in the host's goroutine, and add to the hosts map
	var out chan Command
	in, out := make(chan Command, 1), make(chan Command, 1)
	chand := CmdHandlerFunc(func(cmd Command, res *http.Response, err error) {
		f.Handler.Handle(&Context{Cmd: cmd, Q: f.q}, res, err)
	hf := NewHostFetcher(f.CrawlConfig, baseurl, chand, out)
	// Start the infinite queue goroutine for this host
	go sliceIQ(in, out)
	// Start the working goroutine for this host
	go func() {
	return &hostFetcherInfiniteQ{*hf, in}, nil
Example #3
func (o *OpenTsdbOutput) Init(config interface{}) (err error) {
	o.OpenTsdbOutputConfig = config.(*OpenTsdbOutputConfig)
	//todo: check address validity
	// if o.url, err = url.Parse(o.Address); err != nil {
	//     return fmt.Errorf("Can't parse URL '%s': %s", o.Address, err.Error())
	// }
	o.client = &http.Client{
		Transport: &timeoutTransport{Transport: new(http.Transport)},
		Timeout:   time.Minute,

	var u *url.URL
	if u, err = url.Parse(o.Url); err == nil {

	o.logMsgChan = make(chan []byte, o.LogMsgChSize)

	u, err = u.Parse("/api/put")
	if err != nil {
		return err
	if strings.HasPrefix(u.Host, ":") {
		u.Host = "localhost" + u.Host
	o.Url = u.String()

	if err != nil {
		log.Printf("initialize OpenTsdbOutput failed, %s", err.Error())
		return err
Example #4
func dockerRepo(RepoEndpoint string, myProxy string) {
	repo := http.NewServeMux()
	if myProxy != "" {
		url_i := url.URL{}
		url_proxy, _ := url_i.Parse(myProxy)

		tr := &http.Transport{
			DisableCompression: false,
			DisableKeepAlives:  false,
			Proxy:              http.ProxyURL(url_proxy),
			TLSClientConfig:    &tls.Config{InsecureSkipVerify: true},
		clientRepo := &http.Client{Transport: tr}

		repo.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
			sendHttps(w, r, clientRepo)
		log.Fatal(http.ListenAndServe(RepoEndpoint, repo))
	} else {
		tr := &http.Transport{
			DisableCompression: false,
			DisableKeepAlives:  false,
			TLSClientConfig:    &tls.Config{InsecureSkipVerify: true},
		clientRepo := &http.Client{Transport: tr}

		repo.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
			sendHttps(w, r, clientRepo)
		log.Fatal(http.ListenAndServe(RepoEndpoint, repo))
Example #5
func (this *ReqQiushiModule) Init(global_conf *context.GlobalContext) (err error) {
	this.qiushi_url = global_conf.Qiushi.Location
	this.timeout = global_conf.Qiushi.Timeout
	transport := &http.Transport{}
	transport.Dial = func(netw, addr string) (net.Conn, error) {
		c, err := net.DialTimeout(netw, addr, time.Millisecond*time.Duration(this.timeout))
		if err != nil {
			utils.WarningLog.Write("dail timeout [%s]", err.Error())
			return nil, err
		return c, nil
	transport.MaxIdleConnsPerHost = 10
	transport.ResponseHeaderTimeout = time.Millisecond * time.Duration(this.timeout)
	if global_conf.Proxy.Open {
		url_i := url.URL{}
		url_proxy, _ := url_i.Parse(global_conf.Proxy.Location)
		transport.Proxy = http.ProxyURL(url_proxy)
		utils.DebugLog.Write("open http proxy , proxy location [%s]", global_conf.Proxy.Location)
	this.client = &http.Client{}
	this.client.Transport = transport
	utils.DebugLog.Write("req qiushi url [%s]", this.qiushi_url)

Example #6
func batchRequests(requests []*Request, endPoint *url.URL) []*Response {
	responses := make([]*Response, len(requests))
	var wg sync.WaitGroup
	// TODO: change to use go rutine
	for i, request := range requests {
		go func(i int, request *Request) {
			client := &http.Client{
				Timeout: 10 * time.Second,
			log.Println("Resuest:", request.Method, request.RelativeURL)
			url, err := endPoint.Parse(request.RelativeURL)
			req, err := http.NewRequest(request.Method, url.String(), strings.NewReader(request.Body))
			resp, err := client.Do(req)
			responses[i] = NewResponse(resp)
		}(i, request)
	return responses
Example #7
func (disc *Discoverer) CookArticleURL(baseURL *url.URL, artLink string) (*url.URL, error) {
	// parse, extending to absolute
	u, err := baseURL.Parse(artLink)
	if err != nil {
		return nil, err

	// on a host we accept?
	if !disc.isHostGood(u.Host) {
		return nil, fmt.Errorf("host rejected (%s)", u.Host)

	// matches one of our url forms
	foo := u.RequestURI()
	accept := false
	for _, pat := range disc.ArtPats {
		if pat.MatchString(foo) {
			accept = true
	if !accept {
		return nil, fmt.Errorf("url rejected")

	// apply our sanitising rules for this site
	if disc.StripFragments {
		u.Fragment = ""
	if disc.StripQuery {
		u.RawQuery = ""
	return u, nil
Example #8
func StartServerMultiplesBotsHostPort(uri string, pathl string, host string, port string, newrelic *RelicConfig, bots ...*TgBot) {
	var puri *url.URL
	if uri != "" {
		tmpuri, err := url.Parse(uri)
		if err != nil {
			fmt.Printf("Bad URL %s", uri)
		puri = tmpuri

	botsmap := make(map[string]*TgBot)
	for _, bot := range bots {
		tokendiv := strings.Split(bot.Token, ":")
		if len(tokendiv) != 2 {

		tokenpath := fmt.Sprintf("%s%s", tokendiv[0], tokendiv[1])
		botpathl := path.Join(pathl, tokenpath)

		nuri, _ := puri.Parse(botpathl)
		remoteuri := nuri.String()
		res, error := bot.SetWebhook(remoteuri)

		if error != nil {
			ec := res.ErrorCode
			fmt.Printf("Error setting the webhook: \nError code: %d\nDescription: %s\n", &ec, res.Description)
		if bot.MainListener == nil {
		botsmap[tokenpath] = bot

	pathtolisten := path.Join(pathl, "(?P<token>[a-zA-Z0-9-_]+)")

	m := martini.Classic()
	m.Post(pathtolisten, binding.Json(MessageWithUpdateID{}), func(params martini.Params, msg MessageWithUpdateID) {
		bot, ok := botsmap[params["token"]]

		if ok && msg.UpdateID > 0 && msg.Msg.ID > 0 {
			bot.MainListener <- msg
		} else {
			fmt.Println("Someone tried with: ", params["token"], msg)

	if newrelic != nil {
		gorelic.InitNewrelicAgent(newrelic.Token, newrelic.Name, false)

	if host == "" || port == "" {
	} else {
		m.RunOnAddr(host + ":" + port)
Example #9
func init() {
	h := backend.NewHandler()
	h.CommandName = "get"
	h.CommandPattern = "(get)( )(.*)"
	h.Usage = "get url"
	h.HandlerFunc = func(cmd *backend.Command) (string, bool) {
		//queryHost := cmd.Tokens[1]
		//queryPort := cmd.Tokens[2]
		//queryArgs := cmd.Tokens[3]

		arg := cmd.Tokens[1]
		u := new(url.URL)
		newUrl, err := u.Parse(arg)
		if err != nil {
			return "Unable to parse url: " + err.Error(), false
		fmt.Println("Request: get", newUrl)
		res, err := backend.GetHttpResource(newUrl.String())
		if err != nil {
			return err.Error(), false
		return res, true
	backend.HandlerRegistry[h.CommandName] = h
Example #10
func maybeResolvedLink(root *url.URL, other string) string {
	parsed, err := root.Parse(other)
	if err == nil {
		return parsed.String()

	return other
Example #11
// Init initializes the metadata send queue.
func Init(u *url.URL, debug bool) error {
	mh, err := u.Parse("/api/metadata/put")
	if err != nil {
		return err
	metahost = mh.String()
	metadebug = debug
	go collectMetadata()
	return nil
Example #12
func urlParse(u *url.URL) lua.Function {
	return func(l *lua.State) int {
		newU, err := u.Parse(lua.CheckString(l, 1))
		if err != nil {
			lua.Errorf(l, err.Error())
		pushURL(l, newU)
		return 1
Example #13
// Add ?format=txt to the given URL
func addTXTFormatString(u *url.URL) string {
	ret, err := u.Parse("")
	if err != nil {
		return u.String()
	val := ret.Query()
	val.Add("format", "txt")
	ret.RawQuery = val.Encode()
	return ret.String()
Example #14
func parse(s string, base url.URL) (ln []string, as []string) {
	z := html.NewTokenizer(strings.NewReader(s))
	lnm := make(map[string]struct{})
	asm := make(map[string]struct{})

	// anonymous func used to get attribute values
	attr := func(a string) string {
		var av string
		for {
			if k, v, ha := z.TagAttr(); string(k) == a {
				av = string(v)
			} else if ha == false {
		return av

	// convert map to slice
	slc := func(m map[string]struct{}) []string {
		var v []string

		for k := range m {
			v = append(v, k)
		return v

	for {
		tt := z.Next()
		switch tt {
		case html.ErrorToken:
			return slc(lnm), slc(asm)
		case html.StartTagToken, html.EndTagToken:
			if tn, ha := z.TagName(); ha {
				tg := string(tn)
				if av := attr(attrs[tg]); av != "" {
					switch tg {
					case atag:
						if url, err := base.Parse(av); err == nil {
							if url.Host == base.Host && url.RequestURI() != base.RequestURI() {
								lnm[url.RequestURI()] = struct{}{}
					case stag, itag, ltag:
						asm[av] = struct{}{}
Example #15
func sanitizeLink(u *url.URL, v string) string {
	p, err := u.Parse(v)
	if err != nil {
		return ""
	if !acceptableUriSchemes[p.Scheme] {
		return ""

	return p.String()
Example #16
func ParseRef(base *url.URL, s string, f ...func(*url.URL) error) (*url.URL, error) {
	u, err := base.Parse(s)
	if err != nil {
		return nil, err
	for _, ff := range f {
		if err = ff(u); err != nil {
			return nil, err
	return u, nil
Example #17
// This function has been taken from https://github.com/aybabtme/crawler/blob/master/util.go
func cleanFromURLString(from *url.URL, link string) (*url.URL, error) {
	u, err := url.Parse(link)
	if u.Host == "" {
		u.Scheme = from.Scheme
		u.Host = from.Host
	uStr := purell.NormalizeURL(u, purell.FlagsUsuallySafeGreedy)

	clean, err := from.Parse(uStr)

	return clean, err
Example #18
// Init initializes the metadata send queue.
func Init(u *url.URL, debug bool) error {
	mh, err := u.Parse("/api/metadata/put")
	if err != nil {
		return err
	if strings.HasPrefix(mh.Host, ":") {
		mh.Host = "localhost" + mh.Host
	metahost = mh.String()
	metadebug = debug
	go collectMetadata()
	return nil
Example #19
func sanitiseURL(link string, baseURL *url.URL) (string, error) {
	u, err := baseURL.Parse(link)
	if err != nil {
		return "", err

	// we're only interested in articles, so reject obviously-not-article urls
	if (u.Path == "/" || u.Path == "") && len(u.RawQuery) == 0 {
		return "", fmt.Errorf("obviously not article")

	return purell.NormalizeURL(u, purell.FlagsSafe), nil
Example #20
func sendLinks(job *Job, ctxURL *url.URL, hrefs <-chan Href, links chan<- Link) {
	for href := range hrefs {
		parsed, err := ctxURL.Parse(string(href))
		if err != nil || !(parsed.Scheme == "http" || parsed.Scheme == "https") {
		// Ignore the part of the URL after the "#"
		parsed.Fragment = ""

		links <- Link{URL: parsed, Job: job}
Example #21
func TestResourcePath(t *testing.T) {
	var url *url.URL
	var req Request
	var err error
	var rp ResourcePath
	var prev ResourcePathItem
	var next ResourcePathItem
	var firstItem ResourcePathItem

	url, _ = url.Parse("http://localhost:8000/v1.0/Things")
	req, err = CreateIncomingRequest(url, HTTP)
	assert.Nil(t, err)
	assert.NotNil(t, req)
	rp = req.GetResourcePath()
	assert.NotNil(t, rp)
	assert.Equal(t, 1, len(rp.All()))
	assert.Equal(t, -1, rp.CurrentIndex())
	firstItem = rp.First()
	assert.NotNil(t, firstItem)
	assert.Equal(t, ENTITY_THINGS, firstItem.GetEntity())
	assert.Equal(t, "", firstItem.GetId())
	assert.Nil(t, firstItem.GetQueryOptions())

	// http://localhost:8000/v1.0/Things(12345)
	// http://localhost:8000/v1.0/Things(12345)/Locations
	// http://localhost:8000/v1.0/Things(12345)/Locations(67890)

	url, _ = url.Parse("http://localhost:8000/v1.0/Datastreams(12345)/Sensor")
	req, err = CreateIncomingRequest(url, HTTP)
	assert.Nil(t, err)
	assert.NotNil(t, req)
	rp = req.GetResourcePath()
	assert.NotNil(t, rp)
	assert.Equal(t, 2, len(rp.All()))
	assert.Equal(t, -1, rp.CurrentIndex())
	assert.Equal(t, "12345", rp.First().GetId())
	assert.Equal(t, ENTITY_DATASTREAMS, rp.First().GetEntity())
	assert.True(t, rp.IsFirst())
	assert.NotNil(t, rp.Last())
	assert.True(t, rp.IsLast())
	assert.Equal(t, "", rp.Last().GetId())
	assert.Equal(t, ENTITY_SENSOR, rp.Last().GetEntity())
	prev = rp.Prev()
	assert.Equal(t, "12345", prev.GetId())
	assert.Equal(t, ENTITY_DATASTREAMS, prev.GetEntity())
	next = rp.Next()
	assert.Equal(t, "", next.GetId())
	assert.Equal(t, ENTITY_SENSOR, next.GetEntity())
	assert.False(t, rp.HasNext())
	assert.Nil(t, rp.At(3))
Example #22
func normalizeURL(u1 *url.URL) (u *url.URL, mimetype string) {
	u, _ = u1.Parse("") // normalize
	// the file extension overrides the Accept: header
	if ext := path.Ext(u.Path); ext != "" {
		mimetype = mime.TypeByExtension(ext)
		u.Path = strings.TrimSuffix(u.Path, ext)
	// add a trailing slash if there isn't one (so that relative
	// child URLs don't go to the parent)
	if !strings.HasSuffix(u.Path, "/") {
		u.Path = u.Path + "/"
Example #23
func linkMaker(curr *url.URL, l string) (*url.URL, error) {

	log.Println("Original link", l)
	if !strings.HasSuffix(curr.Path, "/") {
		curr.Path += "/"

	if u, err := curr.Parse(l); err == nil {
		u.Fragment = ""
		return u, nil
	} else {
		return nil, err
Example #24
File: http.go Project: github/hub
func (c *simpleClient) performRequestUrl(method string, url *url.URL, body io.Reader, configure func(*http.Request), redirectsRemaining int) (res *simpleResponse, err error) {
	req, err := http.NewRequest(method, url.String(), body)
	if err != nil {
	req.Header.Set("Authorization", "token "+c.accessToken)
	req.Header.Set("User-Agent", UserAgent)
	req.Header.Set("Accept", apiPayloadVersion)

	if configure != nil {

	var bodyBackup io.ReadWriter
	if req.Body != nil {
		bodyBackup = &bytes.Buffer{}
		req.Body = ioutil.NopCloser(io.TeeReader(req.Body, bodyBackup))

	httpResponse, err := c.httpClient.Do(req)
	if err != nil {

	res = &simpleResponse{httpResponse}
	if res.StatusCode == 307 && redirectsRemaining > 0 {
		url, err = url.Parse(res.Header.Get("Location"))
		if err != nil || url.Host != req.URL.Host || url.Scheme != req.URL.Scheme {
		res, err = c.performRequestUrl(method, url, bodyBackup, configure, redirectsRemaining-1)

Example #25
func mimetypes2net(u *url.URL, mimetypes []string) NetEntity {
	u, _ = u.Parse("") // dup
	u.Path = strings.TrimSuffix(u.Path, "/")
	locations := make([]*url.URL, len(mimetypes))
	for i, mimetype := range mimetypes {
		u2, _ := u.Parse("")
		exts, _ := mime.ExtensionsByType(mimetype)
		if exts == nil || len(exts) == 0 {
			u2.Path += ".httpentity_mimetypes2net_no_extension_should_never_happen?" + mimetype
		} else {
			u2.Path += exts[0]
		locations[i] = u2
	return NetLocations(locations)
// ListObjects returns an array of child object names of the object at the given URL.
func (d *PostgresqlDriver) ListObjects(url *url.URL) ([]string, error) {
	var parentID uint64
	err := d.db.QueryRow("SELECT id FROM data WHERE uri = $1", url.String()).Scan(&parentID)
	if err != nil {
		psqlLog.Error("Error while fetching object %s :: %s", url, err)
		return nil, InternalServerError
	var rows *sql.Rows
	rows, err = d.db.Query("SELECT uri FROM data WHERE parent_id = $1", parentID)
	defer rows.Close()
	if err != nil {
		psqlLog.Error("Error while fetching children of %s :: %s", url, err)
		return nil, InternalServerError
	uris := make([]string, 0, 25)
	for rows.Next() {
		var uri string
		if err := rows.Scan(&uri); err != nil {
			psqlLog.Error("Error when reading row :: %s", err)
			return nil, InternalServerError
		u, err := url.Parse(uri)
		if err != nil {
			psqlLog.Error("Error while parsing URL %s :: %s", uri, err)
			return nil, InternalServerError
		uris = append(uris, path.Base(u.Path))
	return uris, nil
Example #27
func getURL(src *url.URL, token html.Token) (*url.URL, bool) {

	if !contains([]string{"a", "img", "script", "link"}, token.Data) {
		return nil, false

	for _, attr := range token.Attr {
		if contains([]string{"href", "src"}, attr.Key) {
			if u, err := src.Parse(attr.Val); err == nil && ensureCanonical(u) {
				return u, true

	return nil, false
Example #28
func (w *Worker) AskRobots(url *url.URL) (bool, *heroshi.FetchResult) {
	robots_url_str := fmt.Sprintf("%s://%s/robots.txt", url.Scheme, url.Host)
	robots_url, err := url.Parse(robots_url_str)
	if err != nil {
		return false, heroshi.ErrorResult(url, err.Error())

	fetch_result := w.Fetch(robots_url)

	if !fetch_result.Success {
		fetch_result.Status = "Robots download error: " + fetch_result.Status
		return false, fetch_result

	var robots *robotstxt.RobotsData
	robots, err = robotstxt.FromStatusAndBytes(fetch_result.StatusCode, fetch_result.Body)
	if err != nil {
		fetch_result.Status = "Robots parse error: " + err.Error()
		return false, fetch_result

	allow := robots.TestAgent(url.Path, w.UserAgent)
	if !allow {
		return allow, heroshi.ErrorResult(url, "Robots disallow")

	return allow, nil
Example #29
func processRun(
	parentCtx *common.Context,
	client *http.Client,
	baseURL *url.URL,
) error {
	requestURL, err := baseURL.Parse("run/request/")
	if err != nil {
	resp, err := client.Get(requestURL.String())
	if err != nil {
		return err
	defer resp.Body.Close()
	ctx := parentCtx.DebugContext()
	syncID, err := strconv.ParseUint(resp.Header.Get("Sync-ID"), 10, 64)
	if err != nil {
		return err

	decoder := json.NewDecoder(resp.Body)
	var run common.Run
	if err := decoder.Decode(&run); err != nil {
		return err
	uploadURL, err := baseURL.Parse(fmt.Sprintf("run/%d/results/", run.AttemptID))
	if err != nil {
		return err

	finished := make(chan error, 1)

	if err = gradeAndUploadResults(
	); err != nil {
		return err

	return <-finished
Example #30
func getTransportFieldURL(proxy_addr *string) (transport *http.Transport) {
	url_i := url.URL{}
	url_proxy, _ := url_i.Parse(*proxy_addr)
	transport = &http.Transport{
		Proxy: http.ProxyURL(url_proxy),

		Dial: func(netw, addr string) (net.Conn, error) {
			deadline := time.Now().Add(6 * time.Second)
			c, err := net.DialTimeout(netw, addr, time.Second*20)
			if err != nil {
				return nil, err
			return c, nil