// Simplified and altered page rank func WeightTags(s *session.Session) { s.IncrementState() tags := s.GetHashtagCursor() goodWeight := 1.0 / s.GetHashtagSize(true) badWeight := -1.0 / s.GetHashtagSize(false) for { key, err := tags.Next(nil) if err != nil { log.Println(err) break // No further entities match the query. } hashtag := s.Hashtag(key.StringID()) log.Println(hashtag.Name) processTags(s, goodWeight, hashtag.Beneficiaries) processTags(s, badWeight, hashtag.Victims) } s.SetTopTags() // Move on s.IncrementStep() s.StopProcessing() }
// Might be worth putting a defer in here incase a job breaks // I.E to Clean up is processing func Learn(s *session.Session) string { if s.SetLearning() { // New log.Println("Set up learning") status := http.GetStatus(s) s.SetLimits(int(status.Follows), int(status.Followed_by)) } if !s.SetProcessing() { // Show we're still working return " *" } switch s.GetLearningStep() { case utils.APPRAISE: jobs.MinePeople(s) return StatusBar(s, "Mining Followers") case utils.SCORN: jobs.MinePeople(s) return StatusBar(s, "Mining Following") case utils.BUILD: // Logistic Regression // Get records and run go jobs.LogisticRegression(s) s.IncrementStep() return "* Running Logistic Regression" case utils.GOODTAGS: go jobs.CrawlTags(s, true) return StatusBar(s, "Finding Good Tags") case utils.BADTAGS: go jobs.CrawlTags(s, false) return StatusBar(s, "Finding Bad Tags") case utils.COMPUTETAGS: go jobs.WeightTags(s) return "* Ranking Tags" case utils.SHARE: go s.Share() s.IncrementStep() s.StopProcessing() return "Sharing" } return "Stop" }
func CrawlTags(s *session.Session, positive bool) { keys := s.GetPeopleCursor(positive, utils.MAXREQUESTS*s.GetState()) s.IncrementState() total := 0 size := 0 for { key, err := keys.Next(nil) total += 1 if err != nil { if total < utils.MAXREQUESTS { s.IncrementStep() } break // No further entities match the query. } media := http.GetMedia(s, key.StringID()).Data captured := len(media) - 1 for i := 0; i < 3 && i < captured; i++ { tagCount := len(media[i].Tags) lim := 5 for j, tag := range media[i].Tags { if tag == "" { continue } if j >= lim { break } h := s.Hashtag(tag) for k := 0; k < lim && k < tagCount; k++ { if j == k { continue } if tag == media[i].Tags[k] { lim += 1 media[i].Tags[k] = "" continue } if positive { h.Beneficiaries = append(h.Beneficiaries, media[i].Tags[k]) } else { h.Victims = append(h.Beneficiaries, media[i].Tags[k]) } } s.SaveHashtag(h) size += 1 } } } s.IncrementSize(size, positive) s.StopProcessing() }
// Math looks good, but haven't rigorously tested it func LogisticRegression(s *session.Session) { // Grab all people because of many iterations people := s.GetPeople() objective := Objective{ People: people, Lambda: s.GetLambda(), Alpha: s.GetAlpha(), Size: float64(len(people)), } start := []float64{1, 1, 1, 0} // Bias, Following, Followers, Posts minimum := Minimize(objective, start) log.Println(minimum) s.SetTheta(minimum) s.StopProcessing() }
func process(s *session.Session, users *http.Users, i int, follows float64) { for i >= 0 { id := users.Data[i].Id user := http.GetUser(s, id) log.Println(user) if user.Data.Counts.Followed_by+user.Data.Counts.Follows > 0 { //check follower records, if following and guy in other records, don't do anythin person := session.Person{ Followers: float64(user.Data.Counts.Follows), Following: float64(user.Data.Counts.Followed_by), Posts: float64(user.Data.Counts.Media), Follows: !s.CheckCache(id), } // Because unset properties won't change, this should be fine if int(follows) == utils.SCORN { person.Followed = true person.Follows = !person.Follows } // Add to variable and to Keys s.PutPerson(person, id) } // Decrement i-- } // Catches up and thus done if s.FinishedCount() { s.SavePeople() s.StopProcessing() } }