Esempio n. 1
0
// Stem extracts word's stem. Language/encoding of the word
// should match the algorithm/encoding of the created stemmer.
func (ws *WordStemmer) Stem(word []byte) ([]byte, error) {
	ws.mutex.Lock()
	defer ws.mutex.Unlock()

	wordCString := C.CString(string(word))
	defer C.free(unsafe.Pointer(wordCString))

	// Stem the word
	stemResult := C.sb_stemmer_stem(ws.stemmer,
		(*C.sb_symbol)(unsafe.Pointer(wordCString)),
		C.int(len(word)))

	if nil == stemResult {
		return nil, fmt.Errorf("Stemmer for (%s;%s) cannot extract stem for word: '%s'",
			ws.algorithm,
			ws.encoding,
			word)
	}

	stemLen := C.sb_stemmer_length(ws.stemmer)

	if stemLen <= 0 ||
		stemLen > MaxAllowedStemLength {
		return nil, fmt.Errorf("Stemmer for (%s;%s) got incorrect stem length for word: '%s': len = '%d'",
			ws.algorithm,
			ws.encoding,
			word,
			stemLen)
	}

	return C.GoBytes(unsafe.Pointer(stemResult), stemLen), nil
}
Esempio n. 2
0
// Stem returns them stem of word (e.g. running -> run)
func (stmr *Stemmer) Stem(word string) string {
	ptr := unsafe.Pointer(C.CString(word))
	defer C.free(ptr)

	w := (*C.sb_symbol)(ptr)
	res := unsafe.Pointer(C.sb_stemmer_stem(stmr.stmr, w, C.int(len(word))))
	size := C.sb_stemmer_length(stmr.stmr)

	buf := C.GoBytes(res, size)
	return string(buf)
}