func (c *Crawler) ProcessTracks(wg *sync.WaitGroup) error { r := c.RedisClient.Get() defer r.Close() T: for { select { case batch_id, open := <-track_ids: if !open { break T } // Grab the batch of tracks to be crawled (up to 1,000) ids, err := redis.Strings(r.Do("HKEYS", fmt.Sprintf("trackMeta:%d", batch_id))) if err != nil { fmt.Println(err) continue } for _, id := range ids { tid, err := strconv.ParseInt(id, 0, 64) if err != nil { continue } track_id := int(tid) key, hkey := crawler.RedisKey("trackMeta", track_id) exists, _ := redis.Bool(r.Do("HEXISTS", key, hkey)) if exists == false { // we can't crawl this record because it was deleted before in a past crawl. continue } track, err := c.GetTrack(track_id) if err != nil { // We can't crawl this record. Make sure we delete it from our database. r.Do("HDEL", key, hkey) continue } j, err := json.Marshal(track) if err != nil { r.Do("HDEL", key, hkey) continue } r.Do("HSET", key, hkey, string(j)) if track.User.Id > 0 { // Store the user meta data if available userKey, userHkey := crawler.RedisKey("userMeta", track.User.Id) s, _ := redis.String(r.Do("HGET", userKey, userHkey)) if s == "null" || s == "" { // Only update the user data if this is the first time that we have seen them j, err := json.Marshal(track.User) if err != nil { continue } r.Do("HSET", userKey, userHkey, string(j)) } } // Get all of the comments on this track comments := c.GetTrackComments(track.Id) // Transform all of the User Ids from ints into strings track_commenters := []string{} for _, comment := range comments { // AppendSlice will only append to the slice if the user id does not already exist track_commenters = helpers.AppendSlice(track_commenters, fmt.Sprintf("%d", comment.UserId)) } if len(track_commenters) > 0 { comKey, comHkey := crawler.RedisKey("trackCommenters", track_id) // If this is the first time that we have seen this track (empty string or "null") then we // want to increment the counter of the number of user commenters that we have seen s, _ := redis.String(r.Do("HGET", comKey, comHkey)) if s == "null" || s == "" { for range track_commenters { k, h := crawler.RedisKey("trackCountCommenters", track_id) r.Do("HINCRBY", k, h, 1) } } r.Do("HSET", comKey, comHkey, strings.Join(track_commenters, ",")) } // Get all of the users who have favorited this track favoriters := c.GetTrackFavoriters(track.Id) // Transform the User IDs from a slice of ints to a slice of strings track_favoriters := []string{} for _, favorite := range favoriters { track_favoriters = append(track_favoriters, fmt.Sprintf("%d", favorite.Id)) } if len(track_favoriters) > 0 { favKey, favHkey := crawler.RedisKey("trackFavoriters", track_id) // If this is the first time that we have seen this track (empty string or "null") then we // want to increment the counter of the number of user favorites that we have seen s, _ := redis.String(r.Do("HGET", favKey, favHkey)) if s == "null" || s == "" { for range track_favoriters { k, h := crawler.RedisKey("trackCountFavoriters", track_id) r.Do("HINCRBY", k, h, 1) } } r.Do("HSET", favKey, favHkey, strings.Join(track_favoriters, ",")) } } r.Do("SREM", "crawlTracksTodo", batch_id) } } wg.Done() return nil }
func (c *Crawler) ProcessPlaylists(wg *sync.WaitGroup) error { r := c.RedisClient.Get() defer r.Close() P: for { select { case batch_id, open := <-playlist_ids: if !open { break P } // Get all of the IDs in this batch of 1,000 playlists ids, err := redis.Strings(r.Do("HKEYS", fmt.Sprintf("playlistTracks:%d", batch_id))) if err != nil { fmt.Println(err) continue } for _, id := range ids { // CLEANUP Go's string to int is string to int64 and then we are turning the int64 into an int pid, err := strconv.ParseInt(id, 0, 64) if err != nil { continue } playlist_id := int(pid) key, hkey := crawler.RedisKey("playlistTracks", playlist_id) exists, _ := redis.Bool(r.Do("HEXISTS", key, hkey)) if exists == false { // we can't crawl this record because it was deleted before in a past crawl. continue } playlist, err := c.GetPlaylist(playlist_id) if err != nil { // We can't crawl this record. Make sure we delete it from our database. r.Do("HDEL", key, hkey) continue } track_ids := []string{} for _, track := range playlist.Tracks { // AppendSlice keeps a unique slice in case the playlist has the same track multiple times track_ids = helpers.AppendSlice(track_ids, fmt.Sprintf("%d", track.Id)) } if len(track_ids) == 0 { // This playlist doesn't have any tracks associated with it r.Do("HDEL", key, hkey) continue } // If this is the first time that we have seen this playlist (empty string or "null") then we // want to increment the counter for each track in the playlist s, _ := redis.String(r.Do("HGET", key, hkey)) if s == "null" || s == "" { for _, track := range playlist.Tracks { // Increment the counter for the tracks, not the playlist k, h := crawler.RedisKey("trackCountPlaylist", track.Id) r.Do("HINCRBY", k, h, 1) } } r.Do("HSET", key, hkey, strings.Join(track_ids, ",")) } r.Do("SREM", "crawlPlaylistsTodo", batch_id) } } wg.Done() return nil }