func (w *wrapper) To(url string) (*crawler.URL, error) { uu, err := urlx.Parse(url) if err != nil { return nil, err } u := &crawler.URL{} u.URL = *uu u.Depth = w.Depth u.Done = w.Done u.Last = w.Last u.Status = w.Status u.NumVisit = w.NumVisit u.NumRetry = w.NumRetry return u, nil }
// Enqueue adds urls to queue. func (cw *Crawler) Enqueue(urls ...string) error { for _, u := range urls { uu, err := urlx.Parse(u, cw.normalize) if err != nil { return err } if ok, err := cw.store.PutNX(&URL{ URL: *uu, }); err != nil { return err } else if ok { cw.scheduler.NewIn <- uu } } return nil }
func (s *BoltStore) Recover(ch chan<- *url.URL) error { return s.DB.View(func(tx *bolt.Tx) error { b := tx.Bucket(bkURL) c := b.Cursor() for k, v := c.First(); k != nil; k, v = c.Next() { w := &wrapper{} if err := s.codec.Unmarshal(v, w); err != nil { return err } else if w.Done { continue } else if u, err := urlx.Parse(string(k)); err != nil { return err } else { ch <- u } } return nil }) }
func (cw *Crawler) addSeeds(seeds ...string) (n int, err error) { if len(seeds) == 0 { return 0, errors.New("crawler: no seed provided") } for _, seed := range seeds { var u *url.URL var ok bool if u, err = urlx.Parse(seed, cw.normalize); err != nil { return } if ok, err = cw.store.PutNX(&URL{ URL: *u, }); err != nil { return } else if ok { cw.scheduler.NewIn <- u n++ } } return }