示例#1
0
func (ds *CqlModel) listDomainsImpl(seed string, limit int, working bool) ([]DomainInfo, error) {
	if limit <= 0 {
		return nil, fmt.Errorf("Bad value for limit parameter %d", limit)
	}
	db := ds.Db

	var itr *gocql.Iter
	if seed == "" && !working {
		itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info LIMIT ?", limit).Iter()
	} else if seed == "" {
		itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info WHERE dispatched = true LIMIT ?", limit).Iter()
	} else if !working {
		itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info WHERE TOKEN(dom) > TOKEN(?) LIMIT ?", seed, limit).Iter()
	} else { //working==true AND seed != ""
		itr = db.Query("SELECT dom, claim_tok, claim_time FROM domain_info WHERE dispatched = true AND TOKEN(dom) > TOKEN(?) LIMIT ?", seed, limit).Iter()
	}

	var dinfos []DomainInfo
	var domain string
	var claim_tok gocql.UUID
	var claim_time time.Time
	for itr.Scan(&domain, &claim_tok, &claim_time) {
		dinfos = append(dinfos, DomainInfo{Domain: domain, UuidOfQueued: claim_tok, TimeQueued: claim_time})
	}
	err := itr.Close()
	if err != nil {
		return dinfos, err
	}
	err = ds.annotateDomainInfo(dinfos)

	return dinfos, err
}
示例#2
0
//collectLinkInfos populates a []LinkInfo list given a cassandra iterator
func (ds *CqlModel) collectLinkInfos(linfos []LinkInfo, rtimes map[string]rememberTimes, itr *gocql.Iter, limit int) ([]LinkInfo, error) {
	var domain, subdomain, path, protocol, anerror string
	var crawlTime time.Time
	var robotsExcluded bool
	var status int

	for itr.Scan(&domain, &subdomain, &path, &protocol, &crawlTime, &status, &anerror, &robotsExcluded) {

		u, err := walker.CreateURL(domain, subdomain, path, protocol, crawlTime)
		if err != nil {
			return linfos, err
		}
		urlString := u.String()

		qq, yes := rtimes[urlString]

		if yes && qq.ctm.After(crawlTime) {
			continue
		}

		linfo := LinkInfo{
			Url:            urlString,
			Status:         status,
			Error:          anerror,
			RobotsExcluded: robotsExcluded,
			CrawlTime:      crawlTime,
		}

		nindex := -1
		if yes {
			nindex = qq.ind
			linfos[qq.ind] = linfo
		} else {
			// If you've reached the limit, then we're all done
			if len(linfos) >= limit {
				break
			}
			linfos = append(linfos, linfo)
			nindex = len(linfos) - 1
		}
		rtimes[urlString] = rememberTimes{ctm: crawlTime, ind: nindex}
	}

	return linfos, nil
}