Example #1
0
func DoubleMetaphone(in nltk.TokenChan) nltk.TokenChan {
	return start(in, func(tok nltk.Token, out nltk.TokenChan) {
		cs := C.CString(tok.String())
		defer C.free(unsafe.Pointer(cs))
		codes := C.double_metaphone(cs)
		primary, secondary := C.GoString(codes.primary), C.GoString(codes.secondary)
		defer C.free_dm_result(codes)
		out <- nltk.Token(primary)
		if primary != secondary {
			out <- nltk.Token(secondary)
		}
	})
}
Example #2
0
func Ascii(in nltk.TokenChan) nltk.TokenChan {
	return start(in, func(tok nltk.Token, out nltk.TokenChan) {
		cleaned := strings.Map(func(r rune) rune {
			if r > 127 {
				return -1
			}
			return r
		}, tok.String())
		out <- nltk.Token(cleaned)
	})
}
Example #3
0
func Simple(strs ...string) nltk.TokenChan {
	tc := make(nltk.TokenChan, 10)
	go func() {
		for _, str := range strs {
			for _, t := range strings.Fields(str) {
				tc <- nltk.Token(t)
			}
		}
		close(tc)
	}()
	return tc
}
Example #4
0
func Punctuation(in nltk.TokenChan) nltk.TokenChan {
	return start(in, func(tok nltk.Token, out nltk.TokenChan) {
		cleaned := strings.Map(func(r rune) rune {
			switch {
			case 48 <= r && r <= 57: // numbers
				fallthrough
			case 65 <= r && r <= 90: // uppercase
				fallthrough
			case 97 <= r && r <= 122: // lowercase
				return r
			}
			return -1
		}, tok.String())
		out <- nltk.Token(cleaned)
	})
}
Example #5
0
func Superstrip(in nltk.TokenChan) nltk.TokenChan {
	return start(in, func(tok nltk.Token, out nltk.TokenChan) {
		cleaned := strings.Map(func(r rune) rune {
			switch {
			case 48 <= r && r <= 57: // numbers
				fallthrough
			case 97 <= r && r <= 122: // lowercase
				return r
			case 65 <= r && r <= 90: // uppercase
				return r + 32 // Make lowercase
			}
			return -1
		}, tok.String())
		if cleaned != "" {
			out <- nltk.Token(cleaned)
		}
	})
}
Example #6
0
func Lowercase(in nltk.TokenChan) nltk.TokenChan {
	return start(in, func(tok nltk.Token, out nltk.TokenChan) {
		out <- nltk.Token(strings.ToLower(tok.String()))
	})
}
Example #7
0
func stemmerFilter(in nltk.TokenChan, s stemmer.Stemmer) nltk.TokenChan {
	return start(in, func(tok nltk.Token, out nltk.TokenChan) {
		out <- nltk.Token(s.Stem(tok.String()))
	})
}