// Add a slice of stories to the stories FIFO func addStories(s []*story.Story) { stories.mutex.Lock() defer stories.mutex.Unlock() for _, story := range s { stories.add(story) } config.Debug("Added ", len(s), "new stories") config.Debug("start =", stories.start, ", end =", stories.end) }
// Indicate that a user has browsed up to the given storyid func markBrowsed(session *Session, storyid int64) { // We are about to access stories stories.mutex.RLock() defer stories.mutex.RUnlock() if storyid < stories.start || storyid < session.haveBrowsed { config.Debug("Not marking browsed ", storyid, ", ", stories.start, ", ", session.haveBrowsed) session.filtered = session.filtered[0:0] session.unfiltered = session.unfiltered[0:0] return } if storyid > stories.end { log.Println("User has browsed past the last story", storyid, ">", stories.end) session.filtered = session.filtered[0:0] session.unfiltered = session.unfiltered[0:0] return } // Loop through the filtered and unfiltered stories and mark // the unread ones as uninteresting for _, s := range session.filtered { i := s.Id if i < session.haveBrowsed { config.Debug("Not uninteresting: ", i) continue } if !session.haveRead[i] && !session.haveIgnored[i] { session.classifyStory(i, Uninteresting) session.haveIgnored[i] = true } } for _, s := range session.unfiltered { i := s.Id if i < session.haveBrowsed { config.Debug("Not uninteresting: ", i) continue } if !session.haveRead[i] && !session.haveIgnored[i] { session.classifyStory(i, Uninteresting) } } // Clear the filtered and unfiltered stories session.filtered = session.filtered[0:0] session.unfiltered = session.unfiltered[0:0] // Keep track of how far the user has browsed session.haveBrowsed = storyid - 1 }
// Add a story to the fifo func (f *fifo) add(s *story.Story) { // The fifo relies on the assumption that storyids always // increase if s.Id < f.end { log.Panicln("Non-increasing story id added to fifo: ", s.Id, ", ", f.start, ", ", f.end) } if len(f.index) == 0 { f.start = s.Id f.index = append(f.index, s) f.end = s.Id } else if len(f.index) == cap(f.index) { f.index[f.head] = s f.head = (f.head + 1) % cap(f.index) f.start = f.index[f.head].Id f.end = s.Id } else { f.index = append(f.index, s) f.end = s.Id } config.Debug("Added story: ", s.Id, ", ", f.start, "-", f.end) }
// Classify the given word list, returns the id of the class func (c *Classifier) Classify(words []string) int { // Prefilter the words filtered := c.Prefilter(words) weights := make([]float64, len(c.Classes)) // Loop through the classes for i := range c.Classes { config.Debug("Weighting class ", i) weights[i] = c.weight(c.Classes[i], filtered) } // Find the heaviest class h := 0 hw := weights[h] for j, w := range weights { if w > hw { hw = w h = j } } return h }
// Get the session from the cookie in the request // Creates a new session and cookie if none can be found func getSession(w http.ResponseWriter, req *http.Request) (*Session, bool) { cookie, err := req.Cookie("id") if err == nil { s, ok := sessionSync(cookie.Value) if ok { return s, ok } log.Println("Invalid session cookie presented: ", cookie.Value) } // Create a session s := newSession() config.Debug("Creating new session ", s.id) sessions.Create(s) // Write the session to the HTTP response newcookie := http.Cookie{ Name: "id", Value: s.id, Path: "/", MaxAge: math.MaxInt32, HttpOnly: true} w.Header().Add("Set-Cookie", newcookie.String()) s.mutex.Lock() return s, true }
// Get interesting stories starting at the given story func interesting(session *Session, start int64) []*story.Story { // We are about to access stories stories.mutex.RLock() defer stories.mutex.RUnlock() ret := make([]*story.Story, 0, interestingPerPage) if start > stories.end { return ret } for i := start; i < stories.end; i++ { if session.haveRead[i] || session.haveIgnored[i] { continue } story, ok := stories.get(i) if !ok { break } class := session.classifier.Classify(story.Wordlist) if class == Interesting { config.Debug("Interesting:", story.Rss.Title) ret = append(ret, story) if len(ret) == interestingPerPage { break } } } return ret }
// Calculate the weight of the given wordlist for the given class func (c *Classifier) weight(class Class, words []string) float64 { // Handle pathological cases if c.Total == 0 { return 0 } else if class.Count == 0 { return -500 } //log.Println("Vocabulary:", class.vocabulary) // Calculate the prior, log(P(class)) //prior := math.Log2(float64(class.Count) / float64(c.Total)) //config.Debug("prior ", prior) prior := class.Prior // TODO: improve the smoothing - currently wholly unknown titles get // classified as interesting // Calculate the log likelihood log(P(words|class)) ll := prior k := float64(c.Words) for _, w := range words { occurs := class.Vocabulary[w] // Calculate the probability of the word appearing in this class // but add smoothing to avoid overfitting // P(w|class) = N(w,class) + 1 // -------------- // N(class) + k // where: // k = number of words in the training set pw := (float64(occurs) + 1) / (float64(class.Count) + k) config.Debug("P(", w, "|class) = ", pw) ll += math.Log2(pw) } config.Debug("ll =", ll) return ll }
// Save a session to the db func saveSession(c cache.Entry) { // Convert from the cache format to a session session, ok := c.(*Session) if !ok { log.Fatal("Cannot convert cache.Entry to *Session") } config.Debug("Saving session ", session.id) config.Debug("Writing classifier:") config.Debug(session.classifier) config.Debug("Writing classified index: ", session.haveClassified) config.Debug("Writing browsed index: ", session.haveBrowsed) // Serialize the session cbytes, err := session.classifier.Serialise() if err != nil { return } ibytes, err := serialiseStoryMap(session.haveIgnored) if err != nil { return } // Write the session dbs := db.Session{ Id: session.id, Classifier: cbytes, HaveIgnored: ibytes, HaveClassified: session.haveClassified, HaveBrowsed: session.haveBrowsed} if session.isNew { db.CreateSession(&dbs) } else { db.WriteSession(&dbs) } }
func readFeed(filename string) { config.Debug("Reading feed: ", filename) stories, err := rss.Decode(filename) if err != nil { log.Println("Cannot decode ", filename, " :", err) return } todo := make([]*story.Story, 0, 64) for _, s := range stories { todo = addStory(todo, s) } // Add any new stories session.AddStories(todo) }
// Get the interesting stories starting at the given story func FilteredStories(w http.ResponseWriter, req *http.Request, storyid int64) *StoryIndex { // Get the users session if possible session, session_ok := getSession(w, req) if session_ok { defer session.release() } // We are about to access stories stories.mutex.RLock() defer stories.mutex.RUnlock() // Try and start the index at the latest browsed story if no story is specified start := storyid if session_ok && storyid == 0 { if session.haveBrowsed > 0 { start = session.haveBrowsed + 1 } else { start = stories.end - DefaultStart } config.Debug("Building story index starting from", start) } start = max(start, stories.start) ret := NewStoryIndex() // Get stories if !session_ok { ret.Unfiltered = unfiltered(nil, false, start, storiesPerPage, nil) } else if len(session.filtered) > 0 || len(session.unfiltered) > 0 { ret.Filtered = session.filtered ret.Unfiltered = session.unfiltered } else { ret.Filtered = interesting(session, start) todo := storiesPerPage - len(ret.Filtered) ret.Unfiltered = unfiltered(session, true, start, todo, storyIdMap(ret.Filtered)) session.filtered = ret.Filtered session.unfiltered = ret.Unfiltered } previousNext(ret, start) return ret }
// Read a session from the db func readSession(key string) (cache.Entry, bool) { dbs, ok := db.GetSession(key) if !ok { log.Println("Failed to read session ", key, " from db") return nil, false } // Convert the session from db format // Deserialize the classsifier classifier, err := nbc.Deserialise(dbs.Classifier) if err != nil { return nil, false } // Get the read stories read := getReadMap(key) // Deserialise ignored stories ignored, err := deserialiseStoryMap(dbs.HaveIgnored) if err != nil { return nil, false } config.Debug("Deserialised classifier: ", classifier) // Return the deserialized session ret := &Session{ id: dbs.Id, classifier: classifier, haveRead: read, haveIgnored: ignored, haveClassified: dbs.HaveClassified, haveBrowsed: dbs.HaveBrowsed} ret.filtered = make([]*story.Story, 0, storiesPerPage) ret.unfiltered = make([]*story.Story, 0, storiesPerPage) return ret, true }