// Determine if one path is a subpath of another path // Only considers the host and scheme if they are non-empty in the parent // Identical paths are considered subpaths of each other func URLIsSubpath(parent, child *url.URL) bool { logging.Logf(logging.LogDebug, "Subpath check: Parent: %s, child %s.", parent.String(), child.String()) if parent.Scheme != "" && child.Scheme != parent.Scheme { return false } if parent.Host != "" && child.Host != parent.Host { return false } if parent.Path == "/" { // Everything is in this path return true } // Now split the path pPath := path.Clean(parent.Path) cPath := path.Clean(child.Path) if len(cPath) < len(pPath) { return false } if cPath == pPath { return true } if !strings.HasPrefix(cPath, pPath) { logging.Logf(logging.LogDebug, "Reject for differing paths: %s, %s", cPath, pPath) return false } return cPath[len(pPath)] == slash }
func (rm *HTMLResultsManager) writeFooter() { footer := `{{define "FOOTER"}}</table></html>{{end}}` t, err := template.New("htmlResultsManager").Parse(footer) if err != nil { logging.Logf(logging.LogWarning, "Error parsing a template: %s", err.Error()) } err = t.ExecuteTemplate(rm.writer, "FOOTER", nil) if err != nil { logging.Logf(logging.LogWarning, "Error writing template output: %s", err.Error()) } }
func (rm *HTMLResultsManager) writeResult(res *Result) { // TODO: don't rebuild the template with each row tmpl := `{{define "ROW"}}<tr><td>{{.Code}}</td><td><a href="{{.URL.String}}">{{.URL.String}}</a></td><td>{{if ge .Length 0}}{{.Length}}{{end}}</td></tr>{{end}}` t, err := template.New("htmlResultsManager").Parse(tmpl) if err != nil { logging.Logf(logging.LogWarning, "Error parsing a template: %s", err.Error()) } err = t.ExecuteTemplate(rm.writer, "ROW", res) if err != nil { logging.Logf(logging.LogWarning, "Error writing template output: %s", err.Error()) } }
// Filter data from robots.txt func (f *WorkFilter) AddRobotsFilter(scope []*url.URL, clientFactory client.ClientFactory) { for _, scopeURL := range scope { logging.Logf(logging.LogDebug, "Getting robots.txt exclusions for %s", scopeURL) robotsData, err := robots.GetRobotsForURL(scopeURL, clientFactory) if err != nil { logging.Logf(logging.LogWarning, "Unable to get robots.txt data: %s", err) } else { for _, disallowed := range robotsData.GetForUserAgent(f.settings.UserAgent) { disallowedURL := *scopeURL disallowedURL.Path = disallowed logging.Logf(logging.LogDebug, "Disallowing URL by robots: %s", &disallowedURL) f.FilterURL(&disallowedURL) } } } }
func (rm *HTMLResultsManager) writeHeader() { header := `{{define "HEAD"}}<html><head><title>gobuster: {{.BaseURL}}</title></head><h2>Results for <a href="{{.BaseURL}}">{{.BaseURL}}</a></h2><table><tr><th>Code</th><th>URL</th><th>Size</th></tr>{{end}}` t, err := template.New("htmlResultsManager").Parse(header) if err != nil { logging.Logf(logging.LogWarning, "Error parsing a template: %s", err.Error()) } data := struct { BaseURL string }{ BaseURL: rm.BaseURL, } err = t.ExecuteTemplate(rm.writer, "HEAD", data) if err != nil { logging.Logf(logging.LogWarning, "Error writing template output: %s", err.Error()) } }
// Create a ProxyClientFactory for the provided list of proxies. func NewProxyClientFactory(proxies []string, timeout time.Duration, agent string) (*ProxyClientFactory, error) { factory := &ProxyClientFactory{timeout: timeout, userAgent: agent} for _, proxy := range proxies { u, err := url.Parse(proxy) if err != nil { logging.Logf(logging.LogWarning, "Unable to parse proxy: %s", proxy) return nil, err } if _, ok := proxyTypeMap[u.Scheme]; !ok { logging.Logf(logging.LogWarning, "Invalid proxy protocol: %s", u.Scheme) return nil, fmt.Errorf("Invalid proxy protocol: %s", u.Scheme) } if u.Host == "" { logging.Logf(logging.LogWarning, "Missing host for proxy: %s", proxy) return nil, fmt.Errorf("Missing host for proxy: %s", proxy) } factory.proxyURLs = append(factory.proxyURLs, u) } return factory, nil }
func (w *Worker) TryURL(task *url.URL) bool { logging.Logf(logging.LogInfo, "Trying: %s", task.String()) tryMangle := false w.redir = nil if resp, err := w.client.RequestURL(task); err != nil && w.redir == nil { result := results.Result{URL: task, Error: err} if resp != nil { result.Code = resp.StatusCode } w.rchan <- result } else { defer resp.Body.Close() // Do we keep going? if util.URLIsDir(task) && w.KeepSpidering(resp.StatusCode) { logging.Logf(logging.LogDebug, "Referring %s back for spidering.", task.String()) w.adder(task) } if w.redir != nil { logging.Logf(logging.LogDebug, "Referring redirect %s back.", w.redir.URL.String()) w.adder(w.redir.URL) } if w.pageWorker != nil && w.pageWorker.Eligible(resp) { w.pageWorker.Handle(task, resp.Body) } var redir *url.URL if w.redir != nil { redir = w.redir.URL } w.rchan <- results.Result{ URL: task, Code: resp.StatusCode, Redir: redir, Length: resp.ContentLength, } tryMangle = w.KeepSpidering(resp.StatusCode) } if w.settings.SleepTime != 0 { time.Sleep(w.settings.SleepTime) } return tryMangle }
// Get the links for the body. func (*HTMLWorker) GetLinks(body io.Reader) []string { tree, err := html.Parse(body) if err != nil { logging.Logf(logging.LogInfo, "Unable to parse HTML document: %s", err.Error()) return nil } links := collectElementAttributes(tree, "a", "href") links = append(links, collectElementAttributes(tree, "img", "src")...) links = append(links, collectElementAttributes(tree, "script", "src")...) links = append(links, collectElementAttributes(tree, "style", "src")...) return util.DedupeStrings(links) }
// Debug profiling support func EnableCPUProfiling() func() { if profFile, err := os.Create("gobuster.prof"); err != nil { logging.Logf(logging.LogError, "Unable to open gobuster.prof for profiling: %v", err) } else { pprof.StartCPUProfile(profFile) sigintChan := make(chan os.Signal, 1) signal.Notify(sigintChan, os.Interrupt) cancelFunc := func() { logging.Logf(logging.LogWarning, "Stopping profiling...") pprof.StopCPUProfile() signal.Stop(sigintChan) } // Gracefully handle Ctrl+C when profiling. go func() { <-sigintChan cancelFunc() }() return cancelFunc } return nil }
func NewWorkFilter(settings *ss.ScanSettings, counter workqueue.QueueDoneFunc) *WorkFilter { wf := &WorkFilter{done: make(map[string]bool), settings: settings, counter: counter} wf.exclusions = make([]*url.URL, 0, len(settings.ExcludePaths)) for _, path := range settings.ExcludePaths { if u, err := url.Parse(path); err != nil { logging.Logf(logging.LogError, "Unable to parse exclusion path: %s (%s)", path, err.Error()) } else { wf.FilterURL(u) } } return wf }
// Convert BaseURL strings to URLs func (settings *ScanSettings) GetScopes() ([]*url.URL, error) { scopes := make([]*url.URL, len(settings.BaseURLs)) for i, baseURL := range settings.BaseURLs { parsed, err := url.Parse(baseURL) scopes[i] = parsed if err != nil { return nil, fmt.Errorf("Unable to parse BaseURL (%s): %s", baseURL, err.Error()) } if scopes[i].Path == "" { scopes[i].Path = "/" } logging.Logf(logging.LogDebug, "Added BaseURL: %s", scopes[i].String()) } return scopes, nil }
func (q *WorkQueue) SeedFromRobots(scope []*url.URL, clientFactory client.ClientFactory) { for _, scopeURL := range scope { robotsData, err := robots.GetRobotsForURL(scopeURL, clientFactory) if err != nil { logging.Logf(logging.LogWarning, "Unable to get robots.txt data: %s", err) } else { for _, path := range robotsData.GetAllPaths() { pathURL := *scopeURL pathURL.Path = path // Filter will handle if this is out of scope q.AddURLs(scopeURL.ResolveReference(&pathURL)) } } } }
func (ctr *WorkCounter) Done(done int64) { ctr.Lock() defer ctr.Unlock() ctr.done += done ctr.Stats() if ctr.done > ctr.todo { panic("Done exceeded todo in WorkCounter!") } if ctr.done == ctr.todo { // Mark done logging.Logf(logging.LogInfo, "Work counter thinks we're done.") ctr.L.Lock() defer ctr.L.Unlock() ctr.Broadcast() } }
// Work on this response func (w *HTMLWorker) Handle(URL *url.URL, body io.Reader) { links := w.GetLinks(body) foundURLs := make([]*url.URL, 0, len(links)) for _, l := range links { u, err := url.Parse(l) if err != nil { logging.Logf(logging.LogInfo, "Error parsing URL (%s): %s", l, err.Error()) continue } resolved := URL.ResolveReference(u) foundURLs = append(foundURLs, resolved) // Include parents of the found URL. // Worker will remove duplicates foundURLs = append(foundURLs, util.GetParentPaths(resolved)...) } w.adder(foundURLs...) }
func (w *Worker) HandleURL(task *url.URL) { logging.Logf(logging.LogDebug, "Trying Raw URL (unmangled): %s", task.String()) withMangle := w.TryURL(task) if !util.URLIsDir(task) { if withMangle { w.TryMangleURL(task) } if !util.URLHasExtension(task) { for _, ext := range w.settings.Extensions { task := *task task.Path += "." + ext if w.TryURL(&task) { w.TryMangleURL(&task) } } } } // Mark as done w.done(1) }
// Task that can't be used, but should be counted as terminated. func (f *WorkFilter) reject(u *url.URL, reason string) { logging.Logf(logging.LogDebug, "Filter rejected %s: %s.", u.String(), reason) f.counter(1) }
// This is the main runner for gobuster. // TODO: separate the actual scanning from all of the setup steps func main() { util.EnableStackTraces() // Load scan settings settings, err := ss.GetScanSettings() if err != nil { logging.Logf(logging.LogFatal, err.Error()) return } logging.ResetLog(settings.LogfilePath, settings.LogLevel) logging.Logf(logging.LogInfo, "Flags: %s", settings) // Enable CPU profiling var cpuProfStop func() if settings.DebugCPUProf { cpuProfStop = util.EnableCPUProfiling() } // Set number of threads logging.Logf(logging.LogDebug, "Setting GOMAXPROCS to %d.", settings.Threads) runtime.GOMAXPROCS(settings.Threads) // Load wordlist var words []string words, err = wordlist.LoadWordlist(settings.WordlistPath) if err != nil { logging.Logf(logging.LogFatal, "Unable to load wordlist: %s", err.Error()) return } // Build an HTTP Client Factory logging.Logf(logging.LogDebug, "Creating Client Factory...") clientFactory, err := client.NewProxyClientFactory(settings.Proxies, settings.Timeout, settings.UserAgent) if err != nil { logging.Logf(logging.LogFatal, "Unable to build client factory: %s", err.Error()) return } // Starting point scope, err := settings.GetScopes() if err != nil { logging.Logf(logging.LogFatal, err.Error()) return } // Setup the main workqueue logging.Logf(logging.LogDebug, "Starting work queue...") queue := workqueue.NewWorkQueue(settings.QueueSize, scope, settings.AllowHTTPSUpgrade) queue.RunInBackground() logging.Logf(logging.LogDebug, "Creating expander and filter...") expander := filter.Expander{Wordlist: &words, Adder: queue.GetAddCount()} expander.ProcessWordlist() filter := filter.NewWorkFilter(settings, queue.GetDoneFunc()) // Check robots mode if settings.RobotsMode == ss.ObeyRobots { filter.AddRobotsFilter(scope, clientFactory) } work := filter.RunFilter(expander.Expand(queue.GetWorkChan())) logging.Logf(logging.LogDebug, "Creating results manager...") rchan := make(chan results.Result, settings.QueueSize) resultsManager, err := results.GetResultsManager(settings) if err != nil { logging.Logf(logging.LogFatal, "Unable to start results manager: %s", err.Error()) return } logging.Logf(logging.LogDebug, "Starting %d workers...", settings.Workers) worker.StartWorkers(settings, clientFactory, work, queue.GetAddFunc(), queue.GetDoneFunc(), rchan) logging.Logf(logging.LogDebug, "Starting results manager...") resultsManager.Run(rchan) // Kick things off with the seed URL logging.Logf(logging.LogDebug, "Adding starting URLs: %v", scope) queue.AddURLs(scope...) // Potentially seed from robots if settings.RobotsMode == ss.SeedRobots { queue.SeedFromRobots(scope, clientFactory) } // Wait for work to be done logging.Logf(logging.LogDebug, "Main goroutine waiting for work...") queue.WaitPipe() logging.Logf(logging.LogDebug, "Work done.") // Cleanup queue.InputFinished() close(rchan) resultsManager.Wait() if cpuProfStop != nil { cpuProfStop() } logging.Logf(logging.LogDebug, "Done!") }
func (q *WorkQueue) reject(u *url.URL) { logging.Logf(logging.LogDebug, "Workqueue rejecting %s", u.String()) q.ctr.Done(1) }
func (ctr *WorkCounter) Stats() { logging.Logf(logging.LogDebug, "WorkCounter: %d/%d", ctr.done, ctr.todo) }
func DumpStackTrace() { buf := make([]byte, 1<<20) runtime.Stack(buf, true) logging.Logf(logging.LogDebug, "=== received SIGQUIT ===\n*** goroutine dump...\n%s\n*** end\n", buf) }