Пример #1
0
func (c *Crawler) ProcessTracks(wg *sync.WaitGroup) error {
	r := c.RedisClient.Get()
	defer r.Close()
T:
	for {
		select {
		case batch_id, open := <-track_ids:
			if !open {
				break T
			}
			// Grab the batch of tracks to be crawled (up to 1,000)
			ids, err := redis.Strings(r.Do("HKEYS", fmt.Sprintf("trackMeta:%d", batch_id)))
			if err != nil {
				fmt.Println(err)
				continue
			}
			for _, id := range ids {
				tid, err := strconv.ParseInt(id, 0, 64)
				if err != nil {
					continue
				}
				track_id := int(tid)

				key, hkey := crawler.RedisKey("trackMeta", track_id)
				exists, _ := redis.Bool(r.Do("HEXISTS", key, hkey))
				if exists == false {
					// we can't crawl this record because it was deleted before in a past crawl.
					continue
				}
				track, err := c.GetTrack(track_id)
				if err != nil {
					// We can't crawl this record. Make sure we delete it from our database.
					r.Do("HDEL", key, hkey)
					continue
				}
				j, err := json.Marshal(track)
				if err != nil {
					r.Do("HDEL", key, hkey)
					continue
				}
				r.Do("HSET", key, hkey, string(j))
				if track.User.Id > 0 {
					// Store the user meta data if available
					userKey, userHkey := crawler.RedisKey("userMeta", track.User.Id)
					s, _ := redis.String(r.Do("HGET", userKey, userHkey))
					if s == "null" || s == "" {
						// Only update the user data if this is the first time that we have seen them
						j, err := json.Marshal(track.User)
						if err != nil {
							continue
						}
						r.Do("HSET", userKey, userHkey, string(j))
					}
				}

				// Get all of the comments on this track
				comments := c.GetTrackComments(track.Id)
				// Transform all of the User Ids from ints into strings
				track_commenters := []string{}
				for _, comment := range comments {
					// AppendSlice will only append to the slice if the user id does not already exist
					track_commenters = helpers.AppendSlice(track_commenters, fmt.Sprintf("%d", comment.UserId))
				}
				if len(track_commenters) > 0 {
					comKey, comHkey := crawler.RedisKey("trackCommenters", track_id)
					// If this is the first time that we have seen this track (empty string or "null") then we
					// want to increment the counter of the number of user commenters that we have seen
					s, _ := redis.String(r.Do("HGET", comKey, comHkey))
					if s == "null" || s == "" {
						for range track_commenters {
							k, h := crawler.RedisKey("trackCountCommenters", track_id)
							r.Do("HINCRBY", k, h, 1)
						}
					}
					r.Do("HSET", comKey, comHkey, strings.Join(track_commenters, ","))
				}
				// Get all of the users who have favorited this track
				favoriters := c.GetTrackFavoriters(track.Id)
				// Transform the User IDs from a slice of ints to a slice of strings
				track_favoriters := []string{}
				for _, favorite := range favoriters {
					track_favoriters = append(track_favoriters, fmt.Sprintf("%d", favorite.Id))
				}
				if len(track_favoriters) > 0 {
					favKey, favHkey := crawler.RedisKey("trackFavoriters", track_id)
					// If this is the first time that we have seen this track (empty string or "null") then we
					// want to increment the counter of the number of user favorites that we have seen
					s, _ := redis.String(r.Do("HGET", favKey, favHkey))
					if s == "null" || s == "" {
						for range track_favoriters {
							k, h := crawler.RedisKey("trackCountFavoriters", track_id)
							r.Do("HINCRBY", k, h, 1)
						}
					}
					r.Do("HSET", favKey, favHkey, strings.Join(track_favoriters, ","))
				}
			}
			r.Do("SREM", "crawlTracksTodo", batch_id)
		}
	}
	wg.Done()
	return nil
}
Пример #2
0
func (c *Crawler) ProcessPlaylists(wg *sync.WaitGroup) error {
	r := c.RedisClient.Get()
	defer r.Close()
P:
	for {
		select {
		case batch_id, open := <-playlist_ids:
			if !open {
				break P
			}
			// Get all of the IDs in this batch of 1,000 playlists
			ids, err := redis.Strings(r.Do("HKEYS", fmt.Sprintf("playlistTracks:%d", batch_id)))
			if err != nil {
				fmt.Println(err)
				continue
			}
			for _, id := range ids {
				// CLEANUP Go's string to int is string to int64 and then we are turning the int64 into an int
				pid, err := strconv.ParseInt(id, 0, 64)
				if err != nil {
					continue
				}
				playlist_id := int(pid)
				key, hkey := crawler.RedisKey("playlistTracks", playlist_id)
				exists, _ := redis.Bool(r.Do("HEXISTS", key, hkey))
				if exists == false {
					// we can't crawl this record because it was deleted before in a past crawl.
					continue
				}
				playlist, err := c.GetPlaylist(playlist_id)
				if err != nil {
					// We can't crawl this record. Make sure we delete it from our database.
					r.Do("HDEL", key, hkey)
					continue
				}
				track_ids := []string{}
				for _, track := range playlist.Tracks {
					// AppendSlice keeps a unique slice in case the playlist has the same track multiple times
					track_ids = helpers.AppendSlice(track_ids, fmt.Sprintf("%d", track.Id))
				}
				if len(track_ids) == 0 {
					// This playlist doesn't have any tracks associated with it
					r.Do("HDEL", key, hkey)
					continue
				}
				// If this is the first time that we have seen this playlist (empty string or "null") then we
				// want to increment the counter for each track in the playlist
				s, _ := redis.String(r.Do("HGET", key, hkey))
				if s == "null" || s == "" {
					for _, track := range playlist.Tracks {
						// Increment the counter for the tracks, not the playlist
						k, h := crawler.RedisKey("trackCountPlaylist", track.Id)
						r.Do("HINCRBY", k, h, 1)
					}
				}
				r.Do("HSET", key, hkey, strings.Join(track_ids, ","))
			}
			r.Do("SREM", "crawlPlaylistsTodo", batch_id)
		}
	}
	wg.Done()
	return nil
}