Пример #1
0
// reads a file and stores trigrams in database
func ParseFile(filename string) {
	content := ReadFile(filename)
	news_lines := ExtractNewsLine(content)
	Connect(database_name, true)
	for _, line := range news_lines {
		id := PostNews(line)
		for _, trigram := range ngram.BuildNGram(line, 3) {
			PutTrigram(trigram, id)
		}
	}
}
Пример #2
0
// returns n=count titles matching to the title
func GetBestMatches(title string, count int) []string {
	var trigram_matches []int

	// first calculate ngrams of the search string
	for _, trigram := range ngram.BuildNGram(title, 3) {
		for _, match := range data.GetIdsOfTrigram(trigram) {
			trigram_matches = append(trigram_matches, match)
		}
	}

	// TODO: now get the *count* most frequent news ids
	frequencies := map[int]int{}

	for _, id := range trigram_matches {
		_, contains := frequencies[id]
		if contains {
			frequencies[id] += 1
		} else {
			frequencies[id] = 1
		}
	}

	// now sort according to the count
	sorted := SortByFrequency(trigram_matches, frequencies)
	return_val := []int{}
	for i := 0; i <= count; i += 1 {
		return_val = append(return_val, sorted[i])
	}

	// get the according titles
	titles := []string{}
	for id := range return_val {
		titles = append(titles, data.GetNewsTitle(id))
	}

	return titles
}