Beispiel #1
0
func updateAuctionsByIdList(appCtx *lib.AppContext, idList []*backfillTarget) {
	aidriver := NewAuctionItemDriver(appCtx)
	assocdriver := NewAssocCrawlerKeywordAuctionItemDriver(appCtx)
	crawler := yauction.NewCrawler(appCtx.NewHttpClient())
	auctionKeys := make([]*ds.Key, 0)
	auctions := make([]*yauction.AuctionItem, 0)
	assocKeys := make([]*ds.Key, 0)
	assocs := make([]*yauction.AssocCrawlerKeywordAuctionItem, 0)

	for _, target := range idList {
		auction, err := crawler.GetAuction(target.AuctionId)
		if err != nil {
			target.Error = err
			continue
		}
		auctionKeys = append(auctionKeys, aidriver.NewKey(
			auction.Id,
			0,
			nil,
		))
		auctions = append(auctions, auction)
		assocKeys = append(assocKeys, assocdriver.NewKey(
			fmt.Sprintf("%s-%s", target.Keyword, auction.Id),
			0,
			nil,
		))
		assocs = append(assocs, &yauction.AssocCrawlerKeywordAuctionItem{
			Keyword:   target.Keyword,
			AuctionId: auction.Id,
		})
	}

	aidriver.PutMulti(auctionKeys, auctions)
	assocdriver.PutMulti(assocKeys, assocs)
}
Beispiel #2
0
func crawlKeywords(appCtx *lib.AppContext) error {
	var keywords []yauction.CrawlerKeyword
	var logger = appCtx.Logger
	_now := now()

	ckdriver := NewCrawlerKeywordDriver(appCtx)

	q := ckdriver.NewQuery()
	q = q.Filter("EndAt >=", _now)
	_, err := q.GetAll(&keywords)
	if err != nil {
		return err
	}

	// TODO: optimize query result to remove keywords crawled within an hour.
	if len(keywords) == 0 {
		logger.Info("No keywords need to be checked")
		return nil
	}
	logger.Debug("%d Keywords start to crawl", len(keywords))
	var wg sync.WaitGroup
	auctionList := make([][]*yauction.AuctionItem, len(keywords))
	errorList := make([]error, len(keywords))
	for i, keyword := range keywords {
		wg.Add(1)
		iter := yauction.NewAuctionSearchIterator(
			yauction.NewCrawler(appCtx.NewHttpClient()),
			keyword.Keyword,
		)
		go func(j int, iter *yauction.AuctionSearchIterator) {
			list, err := iterateSearchKeyword(iter, logger)
			if err == yauction.ErrNoMoreIds {
				err = nil
				// Update Auctions
				aidriver := NewAuctionItemDriver(appCtx)
				assocdriver := NewAssocCrawlerKeywordAuctionItemDriver(appCtx)
				auctionKeys := make([]*ds.Key, len(list))
				assocKeys := make([]*ds.Key, len(list))
				assocs := make([]*yauction.AssocCrawlerKeywordAuctionItem, len(list))
				for k, auction := range list {
					auctionKeys[k] = aidriver.NewKey(auction.Id, 0, nil)
					assocKeys[k] = assocdriver.NewKey(
						fmt.Sprintf("%s-%s", iter.Keyword, auction.Id),
						0,
						nil,
					)
					assocs[k] = &yauction.AssocCrawlerKeywordAuctionItem{
						Keyword:   iter.Keyword,
						AuctionId: auction.Id,
					}
				}
				_, err = aidriver.PutMulti(auctionKeys, list)
				if err == nil {
					_, err = assocdriver.PutMulti(assocKeys, assocs)
					if err == nil {
						auctionList[j] = list
					}
				}
			}
			errorList[j] = err
			wg.Done()
		}(i, iter)
	}
	wg.Wait()

	numErrors := 0
	numUpdates := 0
	for i, list := range auctionList {
		err = errorList[i]
		if err != nil {
			logger.Error(
				"An error occurred while crawling the keyword %q: %v",
				keywords[i].Keyword,
				errorList[i],
			)
			numErrors += 1
			continue
		} else {
			numUpdates += len(list)
		}
	}
	logger.Info(
		"Finished to crawl %d keywords. Found %d AuctionItems, %d errors.",
		len(keywords), numUpdates, numErrors,
	)
	if numErrors > 0 {
		return fmt.Errorf("Keyword Crawling failed with %d errors", numErrors)
	} else {
		return nil
	}
}
Beispiel #3
0
func crawlClosedKeyword(appCtx *lib.AppContext, keyword string) ([]string, error) {
	var ckeyword yauction.CrawlerKeyword
	var logger = appCtx.Logger

	ckdriver := NewCrawlerKeywordDriver(appCtx)

	if err := ckdriver.Get(
		ckdriver.NewKey(keyword, 0, nil),
		&ckeyword,
	); err != nil {
		return nil, err
	}
	iter := yauction.NewAuctionClosedSearchIterator(
		yauction.NewCrawler(appCtx.NewHttpClient()),
		keyword,
	)
	logger.Info("Start Backfilling for %q", keyword)
	ids, err := iterateClosedSearchKeyword(iter, logger)
	if err != nil {
		return nil, err
	}
	logger.Info("Found %d AuctionIds", len(ids))

	// Group by 20
	updateGroups := make([][]*backfillTarget, 0)
	updateIdList := make([]*backfillTarget, 0)
	for _, id := range ids {
		updateIdList = append(updateIdList, &backfillTarget{
			AuctionId: id,
			Keyword:   keyword,
		})
		if len(updateIdList) == 20 {
			updateGroups = append(updateGroups, updateIdList)
			updateIdList = make([]*backfillTarget, 0)
		}
	}
	if len(updateIdList) > 0 {
		updateGroups = append(updateGroups, updateIdList)
	}

	var wg sync.WaitGroup
	for i, group := range updateGroups {
		wg.Add(1)
		go func(j int, idList []*backfillTarget) {
			updateAuctionsByIdList(appCtx, idList)
			wg.Done()
		}(i, group)
	}
	wg.Wait()

	result := make([]string, 0)
	numErrors := 0
	for _, group := range updateGroups {
		for _, target := range group {
			if target.Error != nil {
				numErrors += 1
			} else {
				result = append(result, target.AuctionId)
			}
		}
	}
	logger.Info(
		"Finished to update %d auctions, %d errors.",
		len(result), numErrors,
	)
	if numErrors > 0 {
		return result, fmt.Errorf("Keyword Crawling failed with %d errors", numErrors)
	} else {
		return result, nil
	}
}
Beispiel #4
0
func crawlExpiredAuctions(appCtx *lib.AppContext) error {
	var auctions []yauction.AuctionItem
	var logger = appCtx.Logger
	_now := now()

	aidriver := NewAuctionItemDriver(appCtx)

	q := aidriver.NewQuery()
	q = q.Filter("EndAt <", _now).Filter("Status =", yauction.STATUS_OPEN).Limit(50)
	keys, err := q.GetAll(&auctions)
	if err != nil {
		return err
	}
	if len(auctions) == 0 {
		logger.Info("No auctions need to be checked")
		return nil
	}

	logger.Debug("Found %d expired auctions", len(auctions))

	var wg sync.WaitGroup
	errorList := make([]error, len(auctions))
	for i, auction := range auctions {
		wg.Add(1)
		go func(j int, origAuction *yauction.AuctionItem) {
			crawler := yauction.NewCrawler(appCtx.NewHttpClient())
			_auction, err := crawler.GetAuction(auction.Id)
			if err == nil {
				if _auction.Status == yauction.STATUS_DELETED {
					origAuction.Status = yauction.STATUS_DELETED
				} else {
					origAuction.Title = _auction.Title
					origAuction.Url = _auction.Url
					origAuction.ImageUrl = _auction.ImageUrl
					origAuction.Seller = _auction.Seller
					origAuction.InitPrice = _auction.InitPrice
					origAuction.CurrentPrice = _auction.CurrentPrice
					origAuction.BidOrBuy = _auction.BidOrBuy
					origAuction.Quantity = _auction.Quantity
					origAuction.Bids = _auction.Bids
					origAuction.StartAt = _auction.StartAt
					origAuction.EndAt = _auction.EndAt
					origAuction.UpdatedAt = now()
					origAuction.Status = _auction.Status
				}
				_, err = aidriver.Put(keys[j], origAuction)
			}
			errorList[j] = err
			wg.Done()
		}(i, &auction)
	}
	wg.Wait()
	numErrors := 0
	numUpdates := 0
	for i, auction := range auctions {
		err = errorList[i]
		if err != nil {
			logger.Error(
				"An error occurred while updating an auction %q: %v",
				auction.Id,
				errorList[i],
			)
			numErrors += 1
			continue
		} else {
			numUpdates += 1
		}
	}
	logger.Info(
		"Finished to update %d auctions, %d errors.",
		numUpdates, numErrors,
	)
	if numErrors > 0 {
		return fmt.Errorf("Crawling expired auctions failed with %d errors", numErrors)
	} else {
		return nil
	}
}