Esempio n. 1
0
//export GOLUCY_RegexTokenizer_init
func GOLUCY_RegexTokenizer_init(rt *C.lucy_RegexTokenizer, pattern *C.cfish_String) *C.lucy_RegexTokenizer {
	C.lucy_Analyzer_init(((*C.lucy_Analyzer)(unsafe.Pointer(rt))))

	ivars := C.lucy_RegexTokenizer_IVARS(rt)
	ivars.pattern = C.CFISH_Str_Clone(pattern)

	var patternGo string
	if pattern == nil {
		patternGo = "\\w+(?:['\\x{2019}]\\w+)*"
	} else {
		patternGo = clownfish.CFStringToGo(unsafe.Pointer(pattern))
	}
	rx, err := regexp.Compile(patternGo)
	if err != nil {
		panic(err)
	}
	rxID := registry.store(rx)
	ivars.token_re = unsafe.Pointer(rxID)

	return rt
}
Esempio n. 2
0
//export GOLUCY_RegexTokenizer_Tokenize_Utf8
func GOLUCY_RegexTokenizer_Tokenize_Utf8(rt *C.lucy_RegexTokenizer, str *C.char,
	stringLen C.size_t, inversion *C.lucy_Inversion) {

	ivars := C.lucy_RegexTokenizer_IVARS(rt)
	rxID := uintptr(ivars.token_re)
	rx, ok := registry.fetch(rxID).(*regexp.Regexp)
	if !ok {
		mess := fmt.Sprintf("Failed to Fetch *RegExp with id %d and pattern %s",
			rxID, clownfish.CFStringToGo(unsafe.Pointer(ivars.pattern)))
		panic(clownfish.NewErr(mess))
	}

	buf := C.GoBytes(unsafe.Pointer(str), C.int(stringLen))
	found := rx.FindAllIndex(buf, int(stringLen))
	lastEnd := 0
	cpCount := 0
	for _, startEnd := range found {
		cpCount = int(C.push_token(str, C.int(startEnd[0]), C.int(startEnd[1]),
			C.int(lastEnd), C.int(cpCount), inversion))
		lastEnd = startEnd[1]
	}
}
Esempio n. 3
0
//export GOLUCY_RegexTokenizer_Destroy
func GOLUCY_RegexTokenizer_Destroy(rt *C.lucy_RegexTokenizer) {
	ivars := C.lucy_RegexTokenizer_IVARS(rt)
	rxID := uintptr(ivars.token_re)
	registry.delete(rxID)
	C.cfish_super_destroy(unsafe.Pointer(rt), C.LUCY_REGEXTOKENIZER)
}