func ExtendURL(u *url.URL, tail string) *url.URL { extended := *u if !util.URLIsDir(u) { extended.Path += "/" + tail } else { extended.Path += tail } return &extended }
func (w *Worker) HandleURL(task *url.URL) { logging.Logf(logging.LogDebug, "Trying Raw URL (unmangled): %s", task.String()) withMangle := w.TryURL(task) if !util.URLIsDir(task) { if withMangle { w.TryMangleURL(task) } if !util.URLHasExtension(task) { for _, ext := range w.settings.Extensions { task := *task task.Path += "." + ext if w.TryURL(&task) { w.TryMangleURL(&task) } } } } // Mark as done w.done(1) }
func (w *Worker) TryURL(task *url.URL) bool { logging.Logf(logging.LogInfo, "Trying: %s", task.String()) tryMangle := false w.redir = nil if resp, err := w.client.RequestURL(task); err != nil && w.redir == nil { result := results.Result{URL: task, Error: err} if resp != nil { result.Code = resp.StatusCode } w.rchan <- result } else { defer resp.Body.Close() // Do we keep going? if util.URLIsDir(task) && w.KeepSpidering(resp.StatusCode) { logging.Logf(logging.LogDebug, "Referring %s back for spidering.", task.String()) w.adder(task) } if w.redir != nil { logging.Logf(logging.LogDebug, "Referring redirect %s back.", w.redir.URL.String()) w.adder(w.redir.URL) } if w.pageWorker != nil && w.pageWorker.Eligible(resp) { w.pageWorker.Handle(task, resp.Body) } var redir *url.URL if w.redir != nil { redir = w.redir.URL } w.rchan <- results.Result{ URL: task, Code: resp.StatusCode, Redir: redir, Length: resp.ContentLength, } tryMangle = w.KeepSpidering(resp.StatusCode) } if w.settings.SleepTime != 0 { time.Sleep(w.settings.SleepTime) } return tryMangle }