func (ds *CqlModel) listDomainsImpl(seed string, limit int, working bool) ([]DomainInfo, error) { if limit <= 0 { return nil, fmt.Errorf("Bad value for limit parameter %d", limit) } db := ds.Db var itr *gocql.Iter if seed == "" && !working { itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info LIMIT ?", limit).Iter() } else if seed == "" { itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info WHERE dispatched = true LIMIT ?", limit).Iter() } else if !working { itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info WHERE TOKEN(dom) > TOKEN(?) LIMIT ?", seed, limit).Iter() } else { //working==true AND seed != "" itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info WHERE dispatched = true AND TOKEN(dom) > TOKEN(?) LIMIT ?", seed, limit).Iter() } var dinfos []DomainInfo var domain string var claim_tok gocql.UUID var claim_time time.Time for itr.Scan(&domain, &claim_tok, &claim_time) { dinfos = append(dinfos, DomainInfo{Domain: domain, UuidOfQueued: claim_tok, TimeQueued: claim_time}) } err := itr.Close() if err != nil { return dinfos, err } err = ds.annotateDomainInfo(dinfos) return dinfos, err }
//collectLinkInfos populates a []LinkInfo list given a cassandra iterator func (ds *CqlModel) collectLinkInfos(linfos []LinkInfo, rtimes map[string]rememberTimes, itr *gocql.Iter, limit int) ([]LinkInfo, error) { var domain, subdomain, path, protocol, anerror string var crawlTime time.Time var robotsExcluded bool var status int for itr.Scan(&domain, &subdomain, &path, &protocol, &crawlTime, &status, &anerror, &robotsExcluded) { u, err := walker.CreateURL(domain, subdomain, path, protocol, crawlTime) if err != nil { return linfos, err } urlString := u.String() qq, yes := rtimes[urlString] if yes && qq.ctm.After(crawlTime) { continue } linfo := LinkInfo{ Url: urlString, Status: status, Error: anerror, RobotsExcluded: robotsExcluded, CrawlTime: crawlTime, } nindex := -1 if yes { nindex = qq.ind linfos[qq.ind] = linfo } else { // If you've reached the limit, then we're all done if len(linfos) >= limit { break } linfos = append(linfos, linfo) nindex = len(linfos) - 1 } rtimes[urlString] = rememberTimes{ctm: crawlTime, ind: nindex} } return linfos, nil }