Example #1
0
// Update the entry indexes and stats, and not update crawling result.
func updateIndexes(appCtx *lib.AppContext, entries []*ameblo.AmebloEntry) (err error) {
	var d = NewAmebloEntryDriver(appCtx)
	oldentries := make([]ameblo.AmebloEntry, len(entries))
	keys := make([]*ds.Key, len(entries))
	for i, ent := range entries {
		keys[i] = d.NewKey(ent.Url, 0, nil)
	}
	if err := d.GetMulti(keys, oldentries); datastore.IsDatastoreError(err) {
		return fmt.Errorf("GetMulti Error in UpdateIndexes: %v", err)
	}

	now := time.Now()
	for i, oldent := range oldentries {
		if oldent.Content != "" {
			entries[i].Content = oldent.Content
			entries[i].CrawledAt = oldent.CrawledAt
		}
		entries[i].UpdatedAt = now
	}

	if _, err := d.PutMulti(keys, entries); datastore.IsDatastoreError(err) {
		return fmt.Errorf("PutMulti Error in UpdateIndexes: %v", err)
	}
	return nil
}
Example #2
0
func NewEventShowList(appCtx *lib.AppContext, showList []event.Show) ([]EventShow, error) {
	var logger = appCtx.Logger
	d := NewEventDriver(appCtx)
	list := make([]EventShow, 0)
	keys := make([]*ds.Key, len(showList))
	eventList := make([]event.Event, len(showList))
	for i, s := range showList {
		eventList[i] = event.Event{} // dummy
		keys[i] = d.NewKey(s.EventId, 0, nil)
	}
	logger.Debug("lengths of showList: %d", len(keys))
	if err := d.GetMulti(keys, eventList); datastore.IsDatastoreError(err) {
		return nil, err
	}
	for i, e := range eventList {
		if e.Id != "" {
			logger.Debug("Found Event entity for Show %q:", showList[i].Id)
			list = append(list, EventShow{
				Show:  showList[i],
				Event: e,
			})
		} else {
			// TODO remove orphans.
			logger.Debug("Orphan Show entity is found: %q:", showList[i].Id)
		}
	}
	return list, nil
}
Example #3
0
func (d *IEpgDriver) BulkUpdate(list []*tv.IEpg) ([]*ds.Key, error) {
	found := make([]tv.IEpg, len(list))
	keys := make([]*ds.Key, len(list))
	putKeys := make([]*ds.Key, 0)
	putEnts := make([]*tv.IEpg, 0)

	d.Logger.Info("Bulkupdate %d IEpg entries...", len(list))

	for i := range list {
		keys[i] = d.keyFromEntity(list[i])
	}

	d.Logger.Info("Checking existing IEpgs ...")
	if err := d.GetMulti(keys, found); datastore.IsDatastoreError(err) {
		return nil, err
	}

	for i := range list {
		newent := list[i]
		oldent := &found[i]

		if newent.StationId == "" {
			d.Logger.Debug("StationId of %q is empty. Ignore this record.", newent.ProgramTitle)
			continue
		}

		if oldent.Optout {
			d.Logger.Debug("%q (Id: %q...) is opted-out.", oldent.Id[:8], oldent.ProgramTitle)
			continue
		}
		if !d.hasDiff(oldent, newent) {
			d.Logger.Debug("%q (%q) does not have any diffs, skipped.", oldent.Id, oldent.ProgramTitle)
			continue
		}
		newent.CreatedAt = oldent.CreatedAt
		newent.UpdatedAt = time.Now()

		putKeys = append(putKeys, d.keyFromEntity(newent))
		putEnts = append(putEnts, newent)
	}
	if len(putEnts) == 0 {
		d.Logger.Info("No entries are needed to update.")
		return make([]*ds.Key, 0), nil
	}

	d.Logger.Info("Updating %d/%d entries.", len(putEnts), len(list))
	if keys, err := d.PutMulti(putKeys, putEnts); err != nil {
		d.Logger.Error("Failed to update entries: %v", len(putEnts), err)
		return nil, err
	} else {
		return keys, nil
	}
}
Example #4
0
func setupApi(app *App) {
	app.Api.Get("/ameblo/insights/:member/history.json", func(res *wcg.Response, req *wcg.Request) {
		historyInsights(res, req, app)
	})
	app.Api.Get("/ameblo/indexes/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		indexAllMembers(res, req, app)
	}))
	app.Api.Get("/ameblo/indexes/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		indexSpecifiedMember(res, req, app)
	}))
	app.Api.Get("/ameblo/contents/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		crawlAllMembers(res, req, app)
	}))
	app.Api.Get("/ameblo/contents/:member.json", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		crawlSpecifiedMembers(res, req, app)
	}))
	app.Api.Delete("/ameblo/contents/:member.json", lib.Admin.Required(
		func(res *wcg.Response, req *wcg.Request) {
			var appCtx = lib.NewAppContextFromRequest(req)

			member, ok := app.Members[req.Param("member")]
			if !ok {
				lib.NotFound(res, req)
				return
			}

			PER_ENT := 100
			offset := 0
			d := NewAmebloEntryDriver(appCtx)

			for {
				var list []*ameblo.AmebloEntry
				var q = d.NewQuery().Filter("Owner =", member.Name).Offset(offset).Limit(PER_ENT)
				if keys, err := q.GetAll(&list); err != nil {
					lib.InternalError(res, req, err)
					return
				} else {
					for _, ent := range list {
						ent.CrawledAt = time.Time{}
					}
					if _, err = d.PutMulti(keys, list); datastore.IsDatastoreError(err) {
						lib.InternalError(res, req, err)
					}
					offset = offset + len(list)
					if len(list) < PER_ENT {
						break
					}
				}
			}
			app.Api.Ok(res, req)
		},
	))
}
Example #5
0
func generateYACrawlerKeywords(appCtx *lib.AppContext) ([]*yauction.CrawlerKeyword, error) {
	var showList []event.Show
	var logger = appCtx.Logger
	q := NewShowDriver(appCtx).NewQuery()
	q = q.Filter("StartAt >=", util.Date(now())).Order("StartAt")
	_, err := q.GetAll(&showList)
	if err != nil {
		return nil, err
	}
	logger.Debug("Found %d EventShow entities to be checked.", len(showList))

	keywordDriver := yauctionapp.NewCrawlerKeywordDriver(appCtx)
	crawlerKeywrodKeys := make([]*ds.Key, 0)
	for _, show := range showList {
		logger.Debug("Show: %q", show.Id)
		if show.YAKeyword == "" {
			logger.Debug("EventShow %q doesn't have YAKeyword, skipped.", show.Id)
			continue
		}
		crawlerKeywrodKeys = append(
			crawlerKeywrodKeys,
			keywordDriver.NewKey(show.YAKeyword, 0, nil),
		)
	}

	crawlerKeywords := make([]*yauction.CrawlerKeyword, len(crawlerKeywrodKeys))
	err = keywordDriver.GetMulti(crawlerKeywrodKeys, crawlerKeywords)
	if err != nil && datastore.IsDatastoreError(err) {
		return nil, err
	}

	updateKeys := make([]*ds.Key, 0)
	updates := make([]*yauction.CrawlerKeyword, 0)
	for i, ck := range crawlerKeywords {
		if ck == nil {
			logger.Debug(
				"No CrawlerKeyword entity is registered for EventShow %q, creating.",
				showList[i].Id,
			)
			keyword := yauctionapp.NormalizeKeyword(showList[i].YAKeyword)
			updateKeys = append(updateKeys, keywordDriver.NewKey(keyword, 0, nil))
			updates = append(updates,
				&yauction.CrawlerKeyword{
					Keyword:   keyword,
					StartAt:   showList[i].OpenAt.AddDate(0, -3, 0), // start monitoring 3 months before
					EndAt:     showList[i].OpenAt,
					CreatedAt: now(),
				},
			)
		} else {
			if ck.StartAt != showList[i].OpenAt.AddDate(0, -3, 0) ||
				ck.EndAt != showList[i].OpenAt {
				logger.Debug(
					"CrawlerKeyword for %q already exisits and needs updating.",
					showList[i].Id,
				)
				ck.StartAt = showList[i].OpenAt.AddDate(0, -3, 0)
				ck.EndAt = showList[i].OpenAt
				updateKeys = append(updateKeys, keywordDriver.NewKey(ck.Keyword, 0, nil))
				updates = append(updates, ck)
			} else {
				logger.Debug(
					"CrawlerKeyword for %q already exisits but no need to update, skipped.",
					showList[i].Id,
				)
			}
		}
	}
	_, err = keywordDriver.PutMulti(updateKeys, updates)
	return updates, err
}
Example #6
0
func updateContents(appCtx *lib.AppContext, entries []*ameblo.AmebloEntry, memberList []*ameblo.Member) error {
	var d = NewAmebloEntryDriver(appCtx)
	// Analyze the references
	refs := make([]*ameblo.AmebloRef, 0)
	for _, ent := range entries {
		for _, m := range memberList {
			if ent.Owner != m.Name && m.IsMentionedIn(ent.Content) {
				refs = append(refs, &ameblo.AmebloRef{ent.Url, ent.Owner, m.Name})
			}
		}
	}

	// Then update content and refs.
	refd := NewAmebloRefDriver(appCtx)
	irefkeys := make([]*ds.Key, len(refs))
	drefkeys := make([]*ds.Key, 0)
	keys := make([]*ds.Key, len(entries))
	// preparing related keys
	for i, ent := range entries {
		ent.CrawledAt = time.Now()
		keys[i] = d.NewKey(ent.Url, 0, nil)
		if _drefkeys, err := refd.NewQuery().Ancestor(keys[i]).KeysOnly().GetAll(nil); datastore.IsDatastoreError(err) {
			return fmt.Errorf("Could not retrieve deletion keys for AmebloRef entities: %v", err)
		} else {
			drefkeys = append(drefkeys, _drefkeys...)
		}
	}
	for i, ref := range refs {
		uid, _ := wcg.UUID()
		irefkeys[i] = refd.NewKey(uid, 0, d.NewKey(ref.Url, 0, nil))
	}

	err := d.RunInTransaction(func(d *datastore.Driver) error {
		// remove all reference data under entries
		if len(drefkeys) > 0 {
			d.Logger.Debug("Deleting old %d AmebloRef entities...", len(drefkeys))
			if err := refd.DeleteMulti(drefkeys); err != nil {
				return fmt.Errorf("Could not delete old AmebloRef entities: %v", err)
			}
		}
		// insert refs
		if len(irefkeys) > 0 {
			d.Logger.Debug("Inserting new %d AmebloRef entities...", len(irefkeys))
			if _, err := refd.PutMulti(irefkeys, refs); datastore.IsDatastoreError(err) {
				return fmt.Errorf("Could not insert new AmebloRef entities: %v", err)
			}
		}
		return nil
	}, nil)
	if err != nil {
		return err
	}

	// update all entries
	d.Logger.Debug("Updating %d AmebloEntry entities...", len(keys))
	if _, err := d.PutMulti(keys, entries); datastore.IsDatastoreError(err) {
		return fmt.Errorf("Could not update AmebloEntry entities: %v", err)
	} else {
		return nil
	}
}
Example #7
0
func setupYAuctionApi(app *App) {
	app.Api.Get("/yauction/admin/closed/", lib.Admin.Required(func(res *wcg.Response, req *wcg.Request) {
		var appCtx = lib.NewAppContextFromRequest(req)
		keyword := req.Query("q")
		if keyword == "" {
			app.Api.BadRequest(res, req, fmt.Errorf("Missing 'q' parameter."))
			return
		}
		ids, err := crawlClosedKeyword(appCtx, keyword)
		if err != nil && ids == nil {
			lib.Error(res, req, err)
			return
		}
		if err != nil {
			res.WriteJson(map[string]interface{}{
				"num_updates": len(ids),
				"auction_ids": ids,
				"error":       err.Error(),
			})
		} else {
			res.WriteJson(map[string]interface{}{
				"num_updates": len(ids),
				"auction_ids": ids,
			})
		}
	}))

	app.Api.Get("/yauction/keywords/", func(res *wcg.Response, req *wcg.Request) {
		keyword, _ := url.QueryUnescape(req.Query("q"))
		var appCtx = lib.NewAppContextFromRequest(req)
		var ckeyword yauction.CrawlerKeyword
		var assocs []yauction.AssocCrawlerKeywordAuctionItem

		ckdriver := NewCrawlerKeywordDriver(appCtx)
		assocdriver := NewAssocCrawlerKeywordAuctionItemDriver(appCtx)
		aidriver := NewAuctionItemDriver(appCtx)

		if err := ckdriver.Get(ckdriver.NewKey(keyword, 0, nil), &ckeyword); err != nil {
			if err == ds.ErrNoSuchEntity {
				app.Api.NotFound(res, req)
			} else {
				app.Api.InternalError(res, req, err)
			}
			return
		}

		if _, err := assocdriver.NewQuery().Filter("Keyword =", keyword).GetAll(&assocs); err != nil {
			app.Api.InternalError(res, req, err)
			return
		}

		itemKeys := make([]*datastore.Key, len(assocs))
		items := make([]yauction.AuctionItem, len(assocs))
		for i, assoc := range assocs {
			itemKeys[i] = aidriver.NewKey(assoc.AuctionId, 0, nil)
		}

		err := aidriver.GetMulti(itemKeys, items)
		if ds.IsDatastoreError(err) {
			app.Api.InternalError(res, req, err)
			return
		}

		res.WriteJson(map[string]interface{}{
			"keyword":  ckeyword,
			"auctions": items,
		})
	})
}