// Update the entry indexes and stats, and not update crawling result. func updateIndexes(appCtx *lib.AppContext, entries []*ameblo.AmebloEntry) (err error) { var d = NewAmebloEntryDriver(appCtx) oldentries := make([]ameblo.AmebloEntry, len(entries)) keys := make([]*ds.Key, len(entries)) for i, ent := range entries { keys[i] = d.NewKey(ent.Url, 0, nil) } if err := d.GetMulti(keys, oldentries); datastore.IsDatastoreError(err) { return fmt.Errorf("GetMulti Error in UpdateIndexes: %v", err) } now := time.Now() for i, oldent := range oldentries { if oldent.Content != "" { entries[i].Content = oldent.Content entries[i].CrawledAt = oldent.CrawledAt } entries[i].UpdatedAt = now } if _, err := d.PutMulti(keys, entries); datastore.IsDatastoreError(err) { return fmt.Errorf("PutMulti Error in UpdateIndexes: %v", err) } return nil }
func NewEventShowList(appCtx *lib.AppContext, showList []event.Show) ([]EventShow, error) { var logger = appCtx.Logger d := NewEventDriver(appCtx) list := make([]EventShow, 0) keys := make([]*ds.Key, len(showList)) eventList := make([]event.Event, len(showList)) for i, s := range showList { eventList[i] = event.Event{} // dummy keys[i] = d.NewKey(s.EventId, 0, nil) } logger.Debug("lengths of showList: %d", len(keys)) if err := d.GetMulti(keys, eventList); datastore.IsDatastoreError(err) { return nil, err } for i, e := range eventList { if e.Id != "" { logger.Debug("Found Event entity for Show %q:", showList[i].Id) list = append(list, EventShow{ Show: showList[i], Event: e, }) } else { // TODO remove orphans. logger.Debug("Orphan Show entity is found: %q:", showList[i].Id) } } return list, nil }
func (d *IEpgDriver) BulkUpdate(list []*tv.IEpg) ([]*ds.Key, error) { found := make([]tv.IEpg, len(list)) keys := make([]*ds.Key, len(list)) putKeys := make([]*ds.Key, 0) putEnts := make([]*tv.IEpg, 0) d.Logger.Info("Bulkupdate %d IEpg entries...", len(list)) for i := range list { keys[i] = d.keyFromEntity(list[i]) } d.Logger.Info("Checking existing IEpgs ...") if err := d.GetMulti(keys, found); datastore.IsDatastoreError(err) { return nil, err } for i := range list { newent := list[i] oldent := &found[i] if newent.StationId == "" { d.Logger.Debug("StationId of %q is empty. Ignore this record.", newent.ProgramTitle) continue } if oldent.Optout { d.Logger.Debug("%q (Id: %q...) is opted-out.", oldent.Id[:8], oldent.ProgramTitle) continue } if !d.hasDiff(oldent, newent) { d.Logger.Debug("%q (%q) does not have any diffs, skipped.", oldent.Id, oldent.ProgramTitle) continue } newent.CreatedAt = oldent.CreatedAt newent.UpdatedAt = time.Now() putKeys = append(putKeys, d.keyFromEntity(newent)) putEnts = append(putEnts, newent) } if len(putEnts) == 0 { d.Logger.Info("No entries are needed to update.") return make([]*ds.Key, 0), nil } d.Logger.Info("Updating %d/%d entries.", len(putEnts), len(list)) if keys, err := d.PutMulti(putKeys, putEnts); err != nil { d.Logger.Error("Failed to update entries: %v", len(putEnts), err) return nil, err } else { return keys, nil } }
func setupApi(app *App) { app.Api.Get("/ameblo/insights/:member/history.json", func(res *wcg.Response, req *wcg.Request) { historyInsights(res, req, app) }) app.Api.Get("/ameblo/indexes/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { indexAllMembers(res, req, app) })) app.Api.Get("/ameblo/indexes/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { indexSpecifiedMember(res, req, app) })) app.Api.Get("/ameblo/contents/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { crawlAllMembers(res, req, app) })) app.Api.Get("/ameblo/contents/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { crawlSpecifiedMembers(res, req, app) })) app.Api.Delete("/ameblo/contents/:member.json", lib.Admin.Required( func(res *wcg.Response, req *wcg.Request) { var appCtx = lib.NewAppContextFromRequest(req) member, ok := app.Members[req.Param("member")] if !ok { lib.NotFound(res, req) return } PER_ENT := 100 offset := 0 d := NewAmebloEntryDriver(appCtx) for { var list []*ameblo.AmebloEntry var q = d.NewQuery().Filter("Owner =", member.Name).Offset(offset).Limit(PER_ENT) if keys, err := q.GetAll(&list); err != nil { lib.InternalError(res, req, err) return } else { for _, ent := range list { ent.CrawledAt = time.Time{} } if _, err = d.PutMulti(keys, list); datastore.IsDatastoreError(err) { lib.InternalError(res, req, err) } offset = offset + len(list) if len(list) < PER_ENT { break } } } app.Api.Ok(res, req) }, )) }
func generateYACrawlerKeywords(appCtx *lib.AppContext) ([]*yauction.CrawlerKeyword, error) { var showList []event.Show var logger = appCtx.Logger q := NewShowDriver(appCtx).NewQuery() q = q.Filter("StartAt >=", util.Date(now())).Order("StartAt") _, err := q.GetAll(&showList) if err != nil { return nil, err } logger.Debug("Found %d EventShow entities to be checked.", len(showList)) keywordDriver := yauctionapp.NewCrawlerKeywordDriver(appCtx) crawlerKeywrodKeys := make([]*ds.Key, 0) for _, show := range showList { logger.Debug("Show: %q", show.Id) if show.YAKeyword == "" { logger.Debug("EventShow %q doesn't have YAKeyword, skipped.", show.Id) continue } crawlerKeywrodKeys = append( crawlerKeywrodKeys, keywordDriver.NewKey(show.YAKeyword, 0, nil), ) } crawlerKeywords := make([]*yauction.CrawlerKeyword, len(crawlerKeywrodKeys)) err = keywordDriver.GetMulti(crawlerKeywrodKeys, crawlerKeywords) if err != nil && datastore.IsDatastoreError(err) { return nil, err } updateKeys := make([]*ds.Key, 0) updates := make([]*yauction.CrawlerKeyword, 0) for i, ck := range crawlerKeywords { if ck == nil { logger.Debug( "No CrawlerKeyword entity is registered for EventShow %q, creating.", showList[i].Id, ) keyword := yauctionapp.NormalizeKeyword(showList[i].YAKeyword) updateKeys = append(updateKeys, keywordDriver.NewKey(keyword, 0, nil)) updates = append(updates, &yauction.CrawlerKeyword{ Keyword: keyword, StartAt: showList[i].OpenAt.AddDate(0, -3, 0), // start monitoring 3 months before EndAt: showList[i].OpenAt, CreatedAt: now(), }, ) } else { if ck.StartAt != showList[i].OpenAt.AddDate(0, -3, 0) || ck.EndAt != showList[i].OpenAt { logger.Debug( "CrawlerKeyword for %q already exisits and needs updating.", showList[i].Id, ) ck.StartAt = showList[i].OpenAt.AddDate(0, -3, 0) ck.EndAt = showList[i].OpenAt updateKeys = append(updateKeys, keywordDriver.NewKey(ck.Keyword, 0, nil)) updates = append(updates, ck) } else { logger.Debug( "CrawlerKeyword for %q already exisits but no need to update, skipped.", showList[i].Id, ) } } } _, err = keywordDriver.PutMulti(updateKeys, updates) return updates, err }
func updateContents(appCtx *lib.AppContext, entries []*ameblo.AmebloEntry, memberList []*ameblo.Member) error { var d = NewAmebloEntryDriver(appCtx) // Analyze the references refs := make([]*ameblo.AmebloRef, 0) for _, ent := range entries { for _, m := range memberList { if ent.Owner != m.Name && m.IsMentionedIn(ent.Content) { refs = append(refs, &ameblo.AmebloRef{ent.Url, ent.Owner, m.Name}) } } } // Then update content and refs. refd := NewAmebloRefDriver(appCtx) irefkeys := make([]*ds.Key, len(refs)) drefkeys := make([]*ds.Key, 0) keys := make([]*ds.Key, len(entries)) // preparing related keys for i, ent := range entries { ent.CrawledAt = time.Now() keys[i] = d.NewKey(ent.Url, 0, nil) if _drefkeys, err := refd.NewQuery().Ancestor(keys[i]).KeysOnly().GetAll(nil); datastore.IsDatastoreError(err) { return fmt.Errorf("Could not retrieve deletion keys for AmebloRef entities: %v", err) } else { drefkeys = append(drefkeys, _drefkeys...) } } for i, ref := range refs { uid, _ := wcg.UUID() irefkeys[i] = refd.NewKey(uid, 0, d.NewKey(ref.Url, 0, nil)) } err := d.RunInTransaction(func(d *datastore.Driver) error { // remove all reference data under entries if len(drefkeys) > 0 { d.Logger.Debug("Deleting old %d AmebloRef entities...", len(drefkeys)) if err := refd.DeleteMulti(drefkeys); err != nil { return fmt.Errorf("Could not delete old AmebloRef entities: %v", err) } } // insert refs if len(irefkeys) > 0 { d.Logger.Debug("Inserting new %d AmebloRef entities...", len(irefkeys)) if _, err := refd.PutMulti(irefkeys, refs); datastore.IsDatastoreError(err) { return fmt.Errorf("Could not insert new AmebloRef entities: %v", err) } } return nil }, nil) if err != nil { return err } // update all entries d.Logger.Debug("Updating %d AmebloEntry entities...", len(keys)) if _, err := d.PutMulti(keys, entries); datastore.IsDatastoreError(err) { return fmt.Errorf("Could not update AmebloEntry entities: %v", err) } else { return nil } }
func setupYAuctionApi(app *App) { app.Api.Get("/yauction/admin/closed/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) { var appCtx = lib.NewAppContextFromRequest(req) keyword := req.Query("q") if keyword == "" { app.Api.BadRequest(res, req, fmt.Errorf("Missing 'q' parameter.")) return } ids, err := crawlClosedKeyword(appCtx, keyword) if err != nil && ids == nil { lib.Error(res, req, err) return } if err != nil { res.WriteJson(map[string]interface{}{ "num_updates": len(ids), "auction_ids": ids, "error": err.Error(), }) } else { res.WriteJson(map[string]interface{}{ "num_updates": len(ids), "auction_ids": ids, }) } })) app.Api.Get("/yauction/keywords/", func(res *wcg.Response, req *wcg.Request) { keyword, _ := url.QueryUnescape(req.Query("q")) var appCtx = lib.NewAppContextFromRequest(req) var ckeyword yauction.CrawlerKeyword var assocs []yauction.AssocCrawlerKeywordAuctionItem ckdriver := NewCrawlerKeywordDriver(appCtx) assocdriver := NewAssocCrawlerKeywordAuctionItemDriver(appCtx) aidriver := NewAuctionItemDriver(appCtx) if err := ckdriver.Get(ckdriver.NewKey(keyword, 0, nil), &ckeyword); err != nil { if err == ds.ErrNoSuchEntity { app.Api.NotFound(res, req) } else { app.Api.InternalError(res, req, err) } return } if _, err := assocdriver.NewQuery().Filter("Keyword =", keyword).GetAll(&assocs); err != nil { app.Api.InternalError(res, req, err) return } itemKeys := make([]*datastore.Key, len(assocs)) items := make([]yauction.AuctionItem, len(assocs)) for i, assoc := range assocs { itemKeys[i] = aidriver.NewKey(assoc.AuctionId, 0, nil) } err := aidriver.GetMulti(itemKeys, items) if ds.IsDatastoreError(err) { app.Api.InternalError(res, req, err) return } res.WriteJson(map[string]interface{}{ "keyword": ckeyword, "auctions": items, }) }) }