Esempio n. 1
0
func ExtendURL(u *url.URL, tail string) *url.URL {
	extended := *u
	if !util.URLIsDir(u) {
		extended.Path += "/" + tail
	} else {
		extended.Path += tail
	}
	return &extended
}
Esempio n. 2
0
func (w *Worker) HandleURL(task *url.URL) {
	logging.Logf(logging.LogDebug, "Trying Raw URL (unmangled): %s", task.String())
	withMangle := w.TryURL(task)
	if !util.URLIsDir(task) {
		if withMangle {
			w.TryMangleURL(task)
		}
		if !util.URLHasExtension(task) {
			for _, ext := range w.settings.Extensions {
				task := *task
				task.Path += "." + ext
				if w.TryURL(&task) {
					w.TryMangleURL(&task)
				}
			}
		}
	}
	// Mark as done
	w.done(1)
}
Esempio n. 3
0
func (w *Worker) TryURL(task *url.URL) bool {
	logging.Logf(logging.LogInfo, "Trying: %s", task.String())
	tryMangle := false
	w.redir = nil
	if resp, err := w.client.RequestURL(task); err != nil && w.redir == nil {
		result := results.Result{URL: task, Error: err}
		if resp != nil {
			result.Code = resp.StatusCode
		}
		w.rchan <- result
	} else {
		defer resp.Body.Close()
		// Do we keep going?
		if util.URLIsDir(task) && w.KeepSpidering(resp.StatusCode) {
			logging.Logf(logging.LogDebug, "Referring %s back for spidering.", task.String())
			w.adder(task)
		}
		if w.redir != nil {
			logging.Logf(logging.LogDebug, "Referring redirect %s back.", w.redir.URL.String())
			w.adder(w.redir.URL)
		}
		if w.pageWorker != nil && w.pageWorker.Eligible(resp) {
			w.pageWorker.Handle(task, resp.Body)
		}
		var redir *url.URL
		if w.redir != nil {
			redir = w.redir.URL
		}
		w.rchan <- results.Result{
			URL:    task,
			Code:   resp.StatusCode,
			Redir:  redir,
			Length: resp.ContentLength,
		}
		tryMangle = w.KeepSpidering(resp.StatusCode)
	}
	if w.settings.SleepTime != 0 {
		time.Sleep(w.settings.SleepTime)
	}
	return tryMangle
}