func UrlDriver(db *db.Database, l logging.Logger) *urlDriver { return &urlDriver{ UrlCollection: urls.Collection(db, l), lastseen: make(map[string]bson.ObjectId), l: l, } }
func main() { flag.Parse() log = logging.NewFromFlags() // Let's go find some mongo. mdb, err := db.Connect("localhost") if err != nil { log.Fatal("Oh no: %v", err) } defer mdb.Session.Close() uc := urls.Collection(mdb, log) work := make(chan *urls.Url) quit := make(chan bool) urls := make(chan *urls.Url) rows := make(chan []interface{}) failed := 0 // If we're checking, spin up some workers if *check { for i := 1; i <= *workq; i++ { go func(n int) { count := 0 for u := range work { count++ log.Debug("w%02d r%04d: Fetching '%s'", n, count, u.Url) res, err := http.Head(u.Url) log.Debug("w%02d r%04d: Response '%s'", n, count, res.Status) if err == nil && res.StatusCode == 200 { urls <- u } else { failed++ } } quit <- true }(i) } } // Function to feed rows into the rows channel. row_feeder := func(sth *sqlite3.Statement, row ...interface{}) { rows <- row } // Function to execute a query on the SQLite db. db_query := func(dbh *sqlite3.Database) { n, err := dbh.Execute("SELECT * FROM urls;", row_feeder) if err == nil { log.Info("Read %d rows from database.\n", n) } else { log.Error("DB error: %s\n", err) } } // Open up the URL database in a goroutine and feed rows // in on the input_rows channel. go func() { sqlite3.Session(*file, db_query) // once we've done the query, close the channel to indicate this close(rows) }() // Another goroutine to munge the rows into Urls and optionally send // them to the pool of checker goroutines. go func() { for row := range rows { u := parseUrl(row) if *check { work <- u } else { urls <- u } } if *check { // Close work channel and wait for all workers to quit. close(work) for i := 0; i < *workq; i++ { <-quit } } close(urls) }() // And finally... count := 0 for u := range urls { // ... push each url into mongo err = uc.Insert(u) if err != nil { log.Error("Awww: %v\n", err) } else { if count%1000 == 0 { fmt.Printf("%d...", count) } count++ } } fmt.Println("done.") if *check { log.Info("Dropped %d non-200 urls.", failed) } log.Info("Inserted %d urls.", count) }