func (this *MITIE) Process(body string) *list.List { tokens := C.mitie_tokenize(C.CString(body)) defer C.mitie_free(unsafe.Pointer(tokens)) dets := C.mitie_extract_entities(this.ner, tokens) defer C.mitie_free(unsafe.Pointer(dets)) num_dets := C.mitie_ner_get_num_detections(dets) duplicates := set.New() entites := list.New() for i := 0; i < int(num_dets); i++ { centity := C.get_entity(tokens, dets, C.ulong(i)) model := C.GoString(centity.model) score := float64(centity.score) value := C.GoString(centity.value) key := fmt.Sprintf("%s:%s", value, model) if duplicates.Has(key) { continue } duplicates.Add(key) if score > 0.5 { entity := models.NewEntity(model, score, value) entites.PushBack(entity) } } return entites }
// Extract runs the extractor and returns a slice of Entities found in the // given tokens. func (ext *Extractor) Extract(tokens []string) ([]Entity, error) { ctokens := C.ner_arr_make(C.int(len(tokens)) + 1) // NULL termination defer C.ner_arr_free(ctokens, C.int(len(tokens))+1) for i, t := range tokens { cs := C.CString(t) // released by ner_arr_free C.ner_arr_set(ctokens, cs, C.int(i)) } dets := C.mitie_extract_entities(ext.ner, ctokens) defer C.mitie_free(unsafe.Pointer(dets)) if dets == nil { return nil, ErrMemory } n := int(C.mitie_ner_get_num_detections(dets)) entities := make([]Entity, n, n) for i := 0; i < n; i++ { pos := int(C.mitie_ner_get_detection_position(dets, C.ulong(i))) len := int(C.mitie_ner_get_detection_length(dets, C.ulong(i))) entities[i] = Entity{ Tag: int(C.mitie_ner_get_detection_tag(dets, C.ulong(i))), Score: float64(C.mitie_ner_get_detection_score(dets, C.ulong(i))), Name: strings.Join(tokens[pos:pos+len], " "), Range: Range{pos, pos + len}, } } return entities, nil }
// Tokenize returns a slice that contains a tokenized copy of the input text. func Tokenize(text string) []string { cs := C.CString(text) defer C.free(unsafe.Pointer(cs)) ctokens := C.mitie_tokenize(cs) defer C.mitie_free(unsafe.Pointer(ctokens)) i := 0 // a hack since mitie arrays are NULL terminated. p := (*[1 << 30]*C.char)(unsafe.Pointer(ctokens)) tokens := make([]string, 0, 20) for p[i] != nil { tokens = append(tokens, C.GoString(p[i])) i++ } return tokens }
// Free frees the underlying used C memory. func (ext *Extractor) Free() { C.mitie_free(unsafe.Pointer(ext.ner)) }
func (this *MITIE) Release() { C.mitie_free(unsafe.Pointer(this.ner)) }