func (this *MITIE) Process(body string) *list.List { tokens := C.mitie_tokenize(C.CString(body)) defer C.mitie_free(unsafe.Pointer(tokens)) dets := C.mitie_extract_entities(this.ner, tokens) defer C.mitie_free(unsafe.Pointer(dets)) num_dets := C.mitie_ner_get_num_detections(dets) duplicates := set.New() entites := list.New() for i := 0; i < int(num_dets); i++ { centity := C.get_entity(tokens, dets, C.ulong(i)) model := C.GoString(centity.model) score := float64(centity.score) value := C.GoString(centity.value) key := fmt.Sprintf("%s:%s", value, model) if duplicates.Has(key) { continue } duplicates.Add(key) if score > 0.5 { entity := models.NewEntity(model, score, value) entites.PushBack(entity) } } return entites }
// Tokenize returns a slice that contains a tokenized copy of the input text. func Tokenize(text string) []string { cs := C.CString(text) defer C.free(unsafe.Pointer(cs)) ctokens := C.mitie_tokenize(cs) defer C.mitie_free(unsafe.Pointer(ctokens)) i := 0 // a hack since mitie arrays are NULL terminated. p := (*[1 << 30]*C.char)(unsafe.Pointer(ctokens)) tokens := make([]string, 0, 20) for p[i] != nil { tokens = append(tokens, C.GoString(p[i])) i++ } return tokens }