Пример #1
0
// Add a slice of stories to the stories FIFO
func addStories(s []*story.Story) {
	stories.mutex.Lock()
	defer stories.mutex.Unlock()

	for _, story := range s {
		stories.add(story)
	}

	config.Debug("Added ", len(s), "new stories")
	config.Debug("start =", stories.start, ", end =", stories.end)
}
Пример #2
0
// Indicate that a user has browsed up to the given storyid
func markBrowsed(session *Session, storyid int64) {

	// We are about to access stories
	stories.mutex.RLock()
	defer stories.mutex.RUnlock()

	if storyid < stories.start || storyid < session.haveBrowsed {
		config.Debug("Not marking browsed ", storyid, ", ",
			stories.start, ", ", session.haveBrowsed)
		session.filtered = session.filtered[0:0]
		session.unfiltered = session.unfiltered[0:0]
		return
	}

	if storyid > stories.end {
		log.Println("User has browsed past the last story", storyid, ">", stories.end)
		session.filtered = session.filtered[0:0]
		session.unfiltered = session.unfiltered[0:0]
		return
	}

	// Loop through the filtered and unfiltered stories and mark
	// the unread ones as uninteresting
	for _, s := range session.filtered {
		i := s.Id
		if i < session.haveBrowsed {
			config.Debug("Not uninteresting: ", i)
			continue
		}
		if !session.haveRead[i] && !session.haveIgnored[i] {
			session.classifyStory(i, Uninteresting)
			session.haveIgnored[i] = true
		}
	}

	for _, s := range session.unfiltered {
		i := s.Id
		if i < session.haveBrowsed {
			config.Debug("Not uninteresting: ", i)
			continue
		}
		if !session.haveRead[i] && !session.haveIgnored[i] {
			session.classifyStory(i, Uninteresting)
		}
	}

	// Clear the filtered and unfiltered stories
	session.filtered = session.filtered[0:0]
	session.unfiltered = session.unfiltered[0:0]

	// Keep track of how far the user has browsed
	session.haveBrowsed = storyid - 1
}
Пример #3
0
// Add a story to the fifo
func (f *fifo) add(s *story.Story) {

	// The fifo relies on the assumption that storyids always
	// increase
	if s.Id < f.end {
		log.Panicln("Non-increasing story id added to fifo: ",
			s.Id, ", ", f.start, ", ", f.end)
	}

	if len(f.index) == 0 {
		f.start = s.Id
		f.index = append(f.index, s)
		f.end = s.Id
	} else if len(f.index) == cap(f.index) {
		f.index[f.head] = s
		f.head = (f.head + 1) % cap(f.index)
		f.start = f.index[f.head].Id
		f.end = s.Id
	} else {
		f.index = append(f.index, s)
		f.end = s.Id
	}

	config.Debug("Added story: ", s.Id, ", ", f.start, "-", f.end)
}
Пример #4
0
// Classify the given word list, returns the id of the class
func (c *Classifier) Classify(words []string) int {

	// Prefilter the words
	filtered := c.Prefilter(words)

	weights := make([]float64, len(c.Classes))

	// Loop through the classes
	for i := range c.Classes {
		config.Debug("Weighting class ", i)
		weights[i] = c.weight(c.Classes[i], filtered)
	}

	// Find the heaviest class
	h := 0
	hw := weights[h]
	for j, w := range weights {
		if w > hw {
			hw = w
			h = j
		}
	}

	return h
}
Пример #5
0
// Get the session from the cookie in the request
// Creates a new session and cookie if none can be found
func getSession(w http.ResponseWriter, req *http.Request) (*Session, bool) {

	cookie, err := req.Cookie("id")
	if err == nil {
		s, ok := sessionSync(cookie.Value)
		if ok {
			return s, ok
		}

		log.Println("Invalid session cookie presented: ", cookie.Value)
	}

	// Create a session
	s := newSession()
	config.Debug("Creating new session ", s.id)
	sessions.Create(s)

	// Write the session to the HTTP response
	newcookie := http.Cookie{
		Name:     "id",
		Value:    s.id,
		Path:     "/",
		MaxAge:   math.MaxInt32,
		HttpOnly: true}
	w.Header().Add("Set-Cookie", newcookie.String())

	s.mutex.Lock()
	return s, true
}
Пример #6
0
// Get interesting stories starting at the given story
func interesting(session *Session, start int64) []*story.Story {

	// We are about to access stories
	stories.mutex.RLock()
	defer stories.mutex.RUnlock()

	ret := make([]*story.Story, 0, interestingPerPage)

	if start > stories.end {
		return ret
	}

	for i := start; i < stories.end; i++ {
		if session.haveRead[i] || session.haveIgnored[i] {
			continue
		}
		story, ok := stories.get(i)
		if !ok {
			break
		}

		class := session.classifier.Classify(story.Wordlist)
		if class == Interesting {
			config.Debug("Interesting:", story.Rss.Title)
			ret = append(ret, story)
			if len(ret) == interestingPerPage {
				break
			}
		}
	}

	return ret
}
Пример #7
0
// Calculate the weight of the given wordlist for the given class
func (c *Classifier) weight(class Class, words []string) float64 {

	// Handle pathological cases
	if c.Total == 0 {
		return 0
	} else if class.Count == 0 {
		return -500
	}

	//log.Println("Vocabulary:", class.vocabulary)

	// Calculate the prior, log(P(class))
	//prior := math.Log2(float64(class.Count) / float64(c.Total))
	//config.Debug("prior ", prior)
	prior := class.Prior

	// TODO: improve the smoothing - currently wholly unknown titles get
	//       classified as interesting

	// Calculate the log likelihood log(P(words|class))
	ll := prior
	k := float64(c.Words)
	for _, w := range words {
		occurs := class.Vocabulary[w]
		// Calculate the probability of the word appearing in this class
		// but add smoothing to avoid overfitting
		// P(w|class) = N(w,class) + 1
		//              --------------
		//              N(class) + k
		// where:
		//   k = number of words in the training set
		pw := (float64(occurs) + 1) / (float64(class.Count) + k)
		config.Debug("P(", w, "|class) = ", pw)
		ll += math.Log2(pw)
	}

	config.Debug("ll =", ll)
	return ll
}
Пример #8
0
// Save a session to the db
func saveSession(c cache.Entry) {
	// Convert from the cache format to a session
	session, ok := c.(*Session)
	if !ok {
		log.Fatal("Cannot convert cache.Entry to *Session")
	}

	config.Debug("Saving session ", session.id)
	config.Debug("Writing classifier:")
	config.Debug(session.classifier)

	config.Debug("Writing classified index: ", session.haveClassified)
	config.Debug("Writing browsed index: ", session.haveBrowsed)

	// Serialize the session
	cbytes, err := session.classifier.Serialise()
	if err != nil {
		return
	}
	ibytes, err := serialiseStoryMap(session.haveIgnored)
	if err != nil {
		return
	}

	// Write the session
	dbs := db.Session{
		Id:             session.id,
		Classifier:     cbytes,
		HaveIgnored:    ibytes,
		HaveClassified: session.haveClassified,
		HaveBrowsed:    session.haveBrowsed}

	if session.isNew {
		db.CreateSession(&dbs)
	} else {
		db.WriteSession(&dbs)
	}
}
Пример #9
0
func readFeed(filename string) {
	config.Debug("Reading feed: ", filename)

	stories, err := rss.Decode(filename)
	if err != nil {
		log.Println("Cannot decode ", filename, " :", err)
		return
	}

	todo := make([]*story.Story, 0, 64)
	for _, s := range stories {
		todo = addStory(todo, s)
	}

	// Add any new stories
	session.AddStories(todo)
}
Пример #10
0
// Get the interesting stories starting at the given story
func FilteredStories(w http.ResponseWriter, req *http.Request, storyid int64) *StoryIndex {

	// Get the users session if possible
	session, session_ok := getSession(w, req)

	if session_ok {
		defer session.release()
	}

	// We are about to access stories
	stories.mutex.RLock()
	defer stories.mutex.RUnlock()

	// Try and start the index at the latest browsed story if no story is specified
	start := storyid
	if session_ok && storyid == 0 {
		if session.haveBrowsed > 0 {
			start = session.haveBrowsed + 1
		} else {
			start = stories.end - DefaultStart
		}
		config.Debug("Building story index starting from", start)
	}
	start = max(start, stories.start)

	ret := NewStoryIndex()

	// Get stories
	if !session_ok {
		ret.Unfiltered = unfiltered(nil, false, start, storiesPerPage, nil)
	} else if len(session.filtered) > 0 || len(session.unfiltered) > 0 {
		ret.Filtered = session.filtered
		ret.Unfiltered = session.unfiltered
	} else {
		ret.Filtered = interesting(session, start)
		todo := storiesPerPage - len(ret.Filtered)
		ret.Unfiltered = unfiltered(session, true, start, todo, storyIdMap(ret.Filtered))
		session.filtered = ret.Filtered
		session.unfiltered = ret.Unfiltered
	}

	previousNext(ret, start)
	return ret
}
Пример #11
0
// Read a session from the db
func readSession(key string) (cache.Entry, bool) {
	dbs, ok := db.GetSession(key)
	if !ok {
		log.Println("Failed to read session ", key, " from db")
		return nil, false
	}

	// Convert the session from db format
	// Deserialize the classsifier
	classifier, err := nbc.Deserialise(dbs.Classifier)
	if err != nil {
		return nil, false
	}

	// Get the read stories
	read := getReadMap(key)

	// Deserialise ignored stories
	ignored, err := deserialiseStoryMap(dbs.HaveIgnored)
	if err != nil {
		return nil, false
	}

	config.Debug("Deserialised classifier: ", classifier)

	// Return the deserialized session
	ret := &Session{
		id:             dbs.Id,
		classifier:     classifier,
		haveRead:       read,
		haveIgnored:    ignored,
		haveClassified: dbs.HaveClassified,
		haveBrowsed:    dbs.HaveBrowsed}

	ret.filtered = make([]*story.Story, 0, storiesPerPage)
	ret.unfiltered = make([]*story.Story, 0, storiesPerPage)
	return ret, true
}