/
main.go
119 lines (95 loc) · 2.73 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package main
import (
"flag"
"fmt"
"io/ioutil"
"net/url"
"os"
log "github.com/cihub/seelog"
"github.com/mattheath/kraken/crawler"
"github.com/mattheath/kraken/sitemap"
)
var (
flagSet = flag.NewFlagSet("kraken", flag.ExitOnError)
target = flagSet.String("target", "", "target URL to crawl")
depth = flagSet.Int("depth", 4, "depth of pages to crawl")
verboseLogging = flagSet.Bool("v", false, "enable verbose logging")
outputDir = flagSet.String("o", "", "directory to output to")
)
func main() {
// Process flags
flagSet.Parse(os.Args[1:])
// Flush logs before exit
setLogger(*verboseLogging)
defer log.Flush()
// Do we have a target?
if *target == "" {
fmt.Println("Please specify a target domain, eg. kraken -target=\"http://example.com\"")
os.Exit(1)
}
targetUrl, err := url.Parse(*target)
if err != nil {
fmt.Println("Could not parse target url '%s' - %v", *target, err)
os.Exit(1)
}
// Directory to save output files
out := *outputDir
if out == "" {
out, err = os.Getwd()
if err != nil {
log.Criticalf("Failed to get current working directory: %v", err)
}
}
// Use a HTTP based fetcher
fetcher := &HttpFetcher{}
// Fire!
log.Infof("Unleashing the Kraken at %s", *target)
// Crawl the specified site
c := crawler.NewCrawler()
c.Work(targetUrl, *depth, fetcher)
// Success
log.Infof("%v pages found, %v requests attempted", len(c.Pages), c.TotalRequests())
writeSitemaps(out, c)
}
// setLogger initialises the logger with the desired verbosity level
func setLogger(verbose bool) {
var logLevel string
if verbose {
logLevel = "debug"
} else {
logLevel = "info"
}
logConfig := `
<seelog>
<outputs>
<filter levels="%s">
<console />
</filter>
</outputs>
</seelog>`
logger, _ := log.LoggerFromConfigAsBytes([]byte(fmt.Sprintf(logConfig, logLevel)))
log.UseLogger(logger)
}
func writeSitemaps(outdir string, c crawler.Crawler) error {
// Build sitemap and write to output file
xmlout := fmt.Sprintf("%s/%s-sitemap.xml", outdir, c.Target().Host)
xmlSitemap, err := sitemap.BuildXMLSitemap(c.AllPages())
if err != nil {
log.Criticalf("Failed to generate sitemap to %s", xmlout)
os.Exit(1)
}
if err := ioutil.WriteFile(xmlout, xmlSitemap, 0644); err != nil {
log.Criticalf("Failed to write sitemap to %s", xmlout)
os.Exit(1)
}
log.Infof("Wrote XML sitemap to %s", xmlout)
// Build JSON site description
siteout := fmt.Sprintf("%s/%s-sitemap.json", outdir, c.Target().Host)
b, err := sitemap.BuildJSONSiteStructure(c.Target(), c.AllPages())
if err := ioutil.WriteFile(siteout, b, 0644); err != nil {
log.Criticalf("Failed to write sitemap to %s", siteout)
os.Exit(1)
}
log.Infof("Wrote JSON sitemap to %s", siteout)
return nil
}