func Init() { uc = urls.Init() if err := os.MkdirAll(*urlCacheDir, 0700); err != nil { logging.Fatal("Couldn't create URL cache dir: %v", err) } bot.Handle(urlScan, client.PRIVMSG) bot.Command(find, "urlfind", "urlfind <regex> -- "+ "searches for previously mentioned URLs matching <regex>") bot.Command(find, "url find", "url find <regex> -- "+ "searches for previously mentioned URLs matching <regex>") bot.Command(find, "urlsearch", "urlsearch <regex> -- "+ "searches for previously mentioned URLs matching <regex>") bot.Command(find, "url search", "url search <regex> -- "+ "searches for previously mentioned URLs matching <regex>") bot.Command(find, "randurl", "randurl -- displays a random URL") bot.Command(find, "random url", "random url -- displays a random URL") bot.Command(shorten, "shorten that", "shorten that -- "+ "shortens the last mentioned URL.") bot.Command(shorten, "shorten", "shorten <url> -- shortens <url>") bot.Command(cache, "cache that", "cache that -- "+ "caches the last mentioned URL.") bot.Command(cache, "cache", "cache <url> -- caches <url>") bot.Command(cache, "save that", "save that -- "+ "caches the last mentioned URL.") bot.Command(cache, "save", "save <url> -- caches <url>") // This serves "shortened" urls http.Handle(shortenPath, http.StripPrefix(shortenPath, http.HandlerFunc(shortenedServer))) // This serves "cached" urls http.Handle(cachePath, http.StripPrefix(cachePath, http.FileServer(http.Dir(*urlCacheDir)))) }
func main() { flag.Parse() logging.InitFromFlags() // Let's go find some mongo. db.Init() defer db.Close() uc := urls.Init() work := make(chan *urls.Url) quit := make(chan bool) urls := make(chan *urls.Url) rows := make(chan []interface{}) failed := 0 // If we're checking, spin up some workers if *check { for i := 1; i <= *workq; i++ { go func(n int) { count := 0 for u := range work { count++ logging.Debug("w%02d r%04d: Fetching '%s'", n, count, u.Url) res, err := http.Head(u.Url) logging.Debug("w%02d r%04d: Response '%s'", n, count, res.Status) if err == nil && res.StatusCode == 200 { urls <- u } else { failed++ } } quit <- true }(i) } } // Function to feed rows into the rows channel. row_feeder := func(sth *sqlite3.Statement, row ...interface{}) { rows <- row } // Function to execute a query on the SQLite db. db_query := func(dbh *sqlite3.Database) { n, err := dbh.Execute("SELECT * FROM urls;", row_feeder) if err == nil { logging.Info("Read %d rows from database.\n", n) } else { logging.Error("DB error: %s\n", err) } } // Open up the URL database in a goroutine and feed rows // in on the input_rows channel. go func() { sqlite3.Session(*file, db_query) // once we've done the query, close the channel to indicate this close(rows) }() // Another goroutine to munge the rows into Urls and optionally send // them to the pool of checker goroutines. go func() { for row := range rows { u := parseUrl(row) if *check { work <- u } else { urls <- u } } if *check { // Close work channel and wait for all workers to quit. close(work) for i := 0; i < *workq; i++ { <-quit } } close(urls) }() // And finally... count := 0 var err error for u := range urls { // ... push each url into mongo err = uc.Insert(u) if err != nil { logging.Error("Awww: %v\n", err) } else { if count%1000 == 0 { fmt.Printf("%d...", count) } count++ } } fmt.Println("done.") if *check { logging.Info("Dropped %d non-200 urls.", failed) } logging.Info("Inserted %d urls.", count) }