func (container *PageContainer) SetWords() {
	var words []string
	var removeIndicies []int
	var counter int

	for _, v := range container.Sentences {
		words = strings.Fields(v)
		removeIndicies = make([]int, 0)
		for i, w := range words {
			words[i] = punctuationRegex.ReplaceAllString(w, "")

			if len(words[i]) == 0 {
				removeIndicies = append(removeIndicies, i)
			} else {
				//words[i] = strings.ToLower(words[i])
				words[i] = string(stemmer.Stem([]byte(strings.ToLower(words[i]))))
			}
		}
		counter = 0
		for _, i := range removeIndicies {
			words = append(words[:i-counter], words[i-counter+1:]...)
			counter += 1
		}

		container.Words = append(container.Words, words)
	}
}
示例#2
0
文件: data.go 项目: pombredanne/gcse
func NormWord(word string) string {
	word = strings.ToLower(word)
	if mapWord, ok := stemBlackList[word]; ok {
		word = mapWord
	} else {
		word = string(stemmer.Stem([]byte(word)))
	}
	return word
}