Пример #1
0
// Stem extracts word's stem. Language/encoding of the word
// should match the algorithm/encoding of the created stemmer.
func (ws *WordStemmer) Stem(word []byte) ([]byte, error) {
	ws.mutex.Lock()
	defer ws.mutex.Unlock()

	wordCString := C.CString(string(word))
	defer C.free(unsafe.Pointer(wordCString))

	// Stem the word
	stemResult := C.sb_stemmer_stem(ws.stemmer,
		(*C.sb_symbol)(unsafe.Pointer(wordCString)),
		C.int(len(word)))

	if nil == stemResult {
		return nil, fmt.Errorf("Stemmer for (%s;%s) cannot extract stem for word: '%s'",
			ws.algorithm,
			ws.encoding,
			word)
	}

	stemLen := C.sb_stemmer_length(ws.stemmer)

	if stemLen <= 0 ||
		stemLen > MaxAllowedStemLength {
		return nil, fmt.Errorf("Stemmer for (%s;%s) got incorrect stem length for word: '%s': len = '%d'",
			ws.algorithm,
			ws.encoding,
			word,
			stemLen)
	}

	return C.GoBytes(unsafe.Pointer(stemResult), stemLen), nil
}
Пример #2
0
// Stem returns them stem of word (e.g. running -> run)
func (stmr *Stemmer) Stem(word string) string {
	ptr := unsafe.Pointer(C.CString(word))
	defer C.free(ptr)

	w := (*C.sb_symbol)(ptr)
	res := unsafe.Pointer(C.sb_stemmer_stem(stmr.stmr, w, C.int(len(word))))
	size := C.sb_stemmer_length(stmr.stmr)

	buf := C.GoBytes(res, size)
	return string(buf)
}
Пример #3
0
func (s Stemmer) StemWord(str string) string {
	cstr := C.CString(str)
	defer C.free(unsafe.Pointer(cstr))
	sbs := C.str_to_sb_symbol(cstr)
	defer C.free(unsafe.Pointer(sbs))

	s.lock.Lock()
	stemmed := C.sb_stemmer_stem(s.stemmer, sbs, C.int(len(str)))
	s.lock.Unlock()

	char := C.sb_symbol_to_char(stemmed)
	val := C.GoString(char)

	return val
}