func updateAuctionsByIdList(appCtx *lib.AppContext, idList []*backfillTarget) { aidriver := NewAuctionItemDriver(appCtx) assocdriver := NewAssocCrawlerKeywordAuctionItemDriver(appCtx) crawler := yauction.NewCrawler(appCtx.NewHttpClient()) auctionKeys := make([]*ds.Key, 0) auctions := make([]*yauction.AuctionItem, 0) assocKeys := make([]*ds.Key, 0) assocs := make([]*yauction.AssocCrawlerKeywordAuctionItem, 0) for _, target := range idList { auction, err := crawler.GetAuction(target.AuctionId) if err != nil { target.Error = err continue } auctionKeys = append(auctionKeys, aidriver.NewKey( auction.Id, 0, nil, )) auctions = append(auctions, auction) assocKeys = append(assocKeys, assocdriver.NewKey( fmt.Sprintf("%s-%s", target.Keyword, auction.Id), 0, nil, )) assocs = append(assocs, &yauction.AssocCrawlerKeywordAuctionItem{ Keyword: target.Keyword, AuctionId: auction.Id, }) } aidriver.PutMulti(auctionKeys, auctions) assocdriver.PutMulti(assocKeys, assocs) }
func crawlKeywords(appCtx *lib.AppContext) error { var keywords []yauction.CrawlerKeyword var logger = appCtx.Logger _now := now() ckdriver := NewCrawlerKeywordDriver(appCtx) q := ckdriver.NewQuery() q = q.Filter("EndAt >=", _now) _, err := q.GetAll(&keywords) if err != nil { return err } // TODO: optimize query result to remove keywords crawled within an hour. if len(keywords) == 0 { logger.Info("No keywords need to be checked") return nil } logger.Debug("%d Keywords start to crawl", len(keywords)) var wg sync.WaitGroup auctionList := make([][]*yauction.AuctionItem, len(keywords)) errorList := make([]error, len(keywords)) for i, keyword := range keywords { wg.Add(1) iter := yauction.NewAuctionSearchIterator( yauction.NewCrawler(appCtx.NewHttpClient()), keyword.Keyword, ) go func(j int, iter *yauction.AuctionSearchIterator) { list, err := iterateSearchKeyword(iter, logger) if err == yauction.ErrNoMoreIds { err = nil // Update Auctions aidriver := NewAuctionItemDriver(appCtx) assocdriver := NewAssocCrawlerKeywordAuctionItemDriver(appCtx) auctionKeys := make([]*ds.Key, len(list)) assocKeys := make([]*ds.Key, len(list)) assocs := make([]*yauction.AssocCrawlerKeywordAuctionItem, len(list)) for k, auction := range list { auctionKeys[k] = aidriver.NewKey(auction.Id, 0, nil) assocKeys[k] = assocdriver.NewKey( fmt.Sprintf("%s-%s", iter.Keyword, auction.Id), 0, nil, ) assocs[k] = &yauction.AssocCrawlerKeywordAuctionItem{ Keyword: iter.Keyword, AuctionId: auction.Id, } } _, err = aidriver.PutMulti(auctionKeys, list) if err == nil { _, err = assocdriver.PutMulti(assocKeys, assocs) if err == nil { auctionList[j] = list } } } errorList[j] = err wg.Done() }(i, iter) } wg.Wait() numErrors := 0 numUpdates := 0 for i, list := range auctionList { err = errorList[i] if err != nil { logger.Error( "An error occurred while crawling the keyword %q: %v", keywords[i].Keyword, errorList[i], ) numErrors += 1 continue } else { numUpdates += len(list) } } logger.Info( "Finished to crawl %d keywords. Found %d AuctionItems, %d errors.", len(keywords), numUpdates, numErrors, ) if numErrors > 0 { return fmt.Errorf("Keyword Crawling failed with %d errors", numErrors) } else { return nil } }
func crawlClosedKeyword(appCtx *lib.AppContext, keyword string) ([]string, error) { var ckeyword yauction.CrawlerKeyword var logger = appCtx.Logger ckdriver := NewCrawlerKeywordDriver(appCtx) if err := ckdriver.Get( ckdriver.NewKey(keyword, 0, nil), &ckeyword, ); err != nil { return nil, err } iter := yauction.NewAuctionClosedSearchIterator( yauction.NewCrawler(appCtx.NewHttpClient()), keyword, ) logger.Info("Start Backfilling for %q", keyword) ids, err := iterateClosedSearchKeyword(iter, logger) if err != nil { return nil, err } logger.Info("Found %d AuctionIds", len(ids)) // Group by 20 updateGroups := make([][]*backfillTarget, 0) updateIdList := make([]*backfillTarget, 0) for _, id := range ids { updateIdList = append(updateIdList, &backfillTarget{ AuctionId: id, Keyword: keyword, }) if len(updateIdList) == 20 { updateGroups = append(updateGroups, updateIdList) updateIdList = make([]*backfillTarget, 0) } } if len(updateIdList) > 0 { updateGroups = append(updateGroups, updateIdList) } var wg sync.WaitGroup for i, group := range updateGroups { wg.Add(1) go func(j int, idList []*backfillTarget) { updateAuctionsByIdList(appCtx, idList) wg.Done() }(i, group) } wg.Wait() result := make([]string, 0) numErrors := 0 for _, group := range updateGroups { for _, target := range group { if target.Error != nil { numErrors += 1 } else { result = append(result, target.AuctionId) } } } logger.Info( "Finished to update %d auctions, %d errors.", len(result), numErrors, ) if numErrors > 0 { return result, fmt.Errorf("Keyword Crawling failed with %d errors", numErrors) } else { return result, nil } }
func crawlExpiredAuctions(appCtx *lib.AppContext) error { var auctions []yauction.AuctionItem var logger = appCtx.Logger _now := now() aidriver := NewAuctionItemDriver(appCtx) q := aidriver.NewQuery() q = q.Filter("EndAt <", _now).Filter("Status =", yauction.STATUS_OPEN).Limit(50) keys, err := q.GetAll(&auctions) if err != nil { return err } if len(auctions) == 0 { logger.Info("No auctions need to be checked") return nil } logger.Debug("Found %d expired auctions", len(auctions)) var wg sync.WaitGroup errorList := make([]error, len(auctions)) for i, auction := range auctions { wg.Add(1) go func(j int, origAuction *yauction.AuctionItem) { crawler := yauction.NewCrawler(appCtx.NewHttpClient()) _auction, err := crawler.GetAuction(auction.Id) if err == nil { if _auction.Status == yauction.STATUS_DELETED { origAuction.Status = yauction.STATUS_DELETED } else { origAuction.Title = _auction.Title origAuction.Url = _auction.Url origAuction.ImageUrl = _auction.ImageUrl origAuction.Seller = _auction.Seller origAuction.InitPrice = _auction.InitPrice origAuction.CurrentPrice = _auction.CurrentPrice origAuction.BidOrBuy = _auction.BidOrBuy origAuction.Quantity = _auction.Quantity origAuction.Bids = _auction.Bids origAuction.StartAt = _auction.StartAt origAuction.EndAt = _auction.EndAt origAuction.UpdatedAt = now() origAuction.Status = _auction.Status } _, err = aidriver.Put(keys[j], origAuction) } errorList[j] = err wg.Done() }(i, &auction) } wg.Wait() numErrors := 0 numUpdates := 0 for i, auction := range auctions { err = errorList[i] if err != nil { logger.Error( "An error occurred while updating an auction %q: %v", auction.Id, errorList[i], ) numErrors += 1 continue } else { numUpdates += 1 } } logger.Info( "Finished to update %d auctions, %d errors.", numUpdates, numErrors, ) if numErrors > 0 { return fmt.Errorf("Crawling expired auctions failed with %d errors", numErrors) } else { return nil } }