// reads a file and stores trigrams in database func ParseFile(filename string) { content := ReadFile(filename) news_lines := ExtractNewsLine(content) Connect(database_name, true) for _, line := range news_lines { id := PostNews(line) for _, trigram := range ngram.BuildNGram(line, 3) { PutTrigram(trigram, id) } } }
// returns n=count titles matching to the title func GetBestMatches(title string, count int) []string { var trigram_matches []int // first calculate ngrams of the search string for _, trigram := range ngram.BuildNGram(title, 3) { for _, match := range data.GetIdsOfTrigram(trigram) { trigram_matches = append(trigram_matches, match) } } // TODO: now get the *count* most frequent news ids frequencies := map[int]int{} for _, id := range trigram_matches { _, contains := frequencies[id] if contains { frequencies[id] += 1 } else { frequencies[id] = 1 } } // now sort according to the count sorted := SortByFrequency(trigram_matches, frequencies) return_val := []int{} for i := 0; i <= count; i += 1 { return_val = append(return_val, sorted[i]) } // get the according titles titles := []string{} for id := range return_val { titles = append(titles, data.GetNewsTitle(id)) } return titles }