//export GOLUCY_RegexTokenizer_init func GOLUCY_RegexTokenizer_init(rt *C.lucy_RegexTokenizer, pattern *C.cfish_String) *C.lucy_RegexTokenizer { C.lucy_Analyzer_init(((*C.lucy_Analyzer)(unsafe.Pointer(rt)))) ivars := C.lucy_RegexTokenizer_IVARS(rt) ivars.pattern = C.CFISH_Str_Clone(pattern) var patternGo string if pattern == nil { patternGo = "\\w+(?:['\\x{2019}]\\w+)*" } else { patternGo = clownfish.CFStringToGo(unsafe.Pointer(pattern)) } rx, err := regexp.Compile(patternGo) if err != nil { panic(err) } rxID := registry.store(rx) ivars.token_re = unsafe.Pointer(rxID) return rt }
//export GOLUCY_RegexTokenizer_Tokenize_Utf8 func GOLUCY_RegexTokenizer_Tokenize_Utf8(rt *C.lucy_RegexTokenizer, str *C.char, stringLen C.size_t, inversion *C.lucy_Inversion) { ivars := C.lucy_RegexTokenizer_IVARS(rt) rxID := uintptr(ivars.token_re) rx, ok := registry.fetch(rxID).(*regexp.Regexp) if !ok { mess := fmt.Sprintf("Failed to Fetch *RegExp with id %d and pattern %s", rxID, clownfish.CFStringToGo(unsafe.Pointer(ivars.pattern))) panic(clownfish.NewErr(mess)) } buf := C.GoBytes(unsafe.Pointer(str), C.int(stringLen)) found := rx.FindAllIndex(buf, int(stringLen)) lastEnd := 0 cpCount := 0 for _, startEnd := range found { cpCount = int(C.push_token(str, C.int(startEnd[0]), C.int(startEnd[1]), C.int(lastEnd), C.int(cpCount), inversion)) lastEnd = startEnd[1] } }
//export GOLUCY_RegexTokenizer_Destroy func GOLUCY_RegexTokenizer_Destroy(rt *C.lucy_RegexTokenizer) { ivars := C.lucy_RegexTokenizer_IVARS(rt) rxID := uintptr(ivars.token_re) registry.delete(rxID) C.cfish_super_destroy(unsafe.Pointer(rt), C.LUCY_REGEXTOKENIZER) }