// Simplified and altered page rank func WeightTags(s *session.Session) { s.IncrementState() tags := s.GetHashtagCursor() goodWeight := 1.0 / s.GetHashtagSize(true) badWeight := -1.0 / s.GetHashtagSize(false) for { key, err := tags.Next(nil) if err != nil { log.Println(err) break // No further entities match the query. } hashtag := s.Hashtag(key.StringID()) log.Println(hashtag.Name) processTags(s, goodWeight, hashtag.Beneficiaries) processTags(s, badWeight, hashtag.Victims) } s.SetTopTags() // Move on s.IncrementStep() s.StopProcessing() }
// Might be worth putting a defer in here incase a job breaks // I.E to Clean up is processing func Learn(s *session.Session) string { if s.SetLearning() { // New log.Println("Set up learning") status := http.GetStatus(s) s.SetLimits(int(status.Follows), int(status.Followed_by)) } if !s.SetProcessing() { // Show we're still working return " *" } switch s.GetLearningStep() { case utils.APPRAISE: jobs.MinePeople(s) return StatusBar(s, "Mining Followers") case utils.SCORN: jobs.MinePeople(s) return StatusBar(s, "Mining Following") case utils.BUILD: // Logistic Regression // Get records and run go jobs.LogisticRegression(s) s.IncrementStep() return "* Running Logistic Regression" case utils.GOODTAGS: go jobs.CrawlTags(s, true) return StatusBar(s, "Finding Good Tags") case utils.BADTAGS: go jobs.CrawlTags(s, false) return StatusBar(s, "Finding Bad Tags") case utils.COMPUTETAGS: go jobs.WeightTags(s) return "* Ranking Tags" case utils.SHARE: go s.Share() s.IncrementStep() s.StopProcessing() return "Sharing" } return "Stop" }
func CrawlTags(s *session.Session, positive bool) { keys := s.GetPeopleCursor(positive, utils.MAXREQUESTS*s.GetState()) s.IncrementState() total := 0 size := 0 for { key, err := keys.Next(nil) total += 1 if err != nil { if total < utils.MAXREQUESTS { s.IncrementStep() } break // No further entities match the query. } media := http.GetMedia(s, key.StringID()).Data captured := len(media) - 1 for i := 0; i < 3 && i < captured; i++ { tagCount := len(media[i].Tags) lim := 5 for j, tag := range media[i].Tags { if tag == "" { continue } if j >= lim { break } h := s.Hashtag(tag) for k := 0; k < lim && k < tagCount; k++ { if j == k { continue } if tag == media[i].Tags[k] { lim += 1 media[i].Tags[k] = "" continue } if positive { h.Beneficiaries = append(h.Beneficiaries, media[i].Tags[k]) } else { h.Victims = append(h.Beneficiaries, media[i].Tags[k]) } } s.SaveHashtag(h) size += 1 } } } s.IncrementSize(size, positive) s.StopProcessing() }
func MinePeople(s *session.Session) { // Set up channel next := make(chan *http.Users) var batch http.Users if s.GetNext() == "" { if s.GetState() > 0 { s.IncrementStep() } if s.GetLearningStep() == utils.APPRAISE { batch = http.GetFollowers(s) } else { batch = http.GetFollowing(s) } } else { batch = http.GetNext(s, s.GetNext()) } go listen(s, next, 0, s.IncrementState()) next <- &batch }
// Async set up multi calls func listen(s *session.Session, next chan *http.Users, calls int, follows float64) { for { select { case users := <-next: i := len(users.Data) - 1 s.IncrementCount() go process(s, users, i, follows) close(next) if calls == utils.MAXPEOPLEGRAB { s.SetNext(users.Pagination.Next_url) return } var batch http.Users nxt := make(chan *http.Users) if users.Pagination.Next_url != "" { log.Println("Getting another batch") batch = http.GetNext(s, users.Pagination.Next_url) } else if follows == 0 { // follows == float64(s.GetLearningStep()) then have a array of functions log.Println("Proceeding to next Step") s.IncrementStep() s.IncrementState() batch = http.GetFollowing(s) follows = float64(s.GetLearningStep()) } else { s.SetNext("") return } go listen(s, nxt, calls+1, follows) nxt <- &batch return } } }